# Copyright(C) 2011 Pierre Mazière
#
# This file is part of woob.
#
# woob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# woob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with woob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations
import hashlib
import tempfile
from typing import IO, ClassVar, TYPE_CHECKING
if TYPE_CHECKING:
from woob.browser import Browser
try:
from PIL import Image
except ImportError:
raise ImportError('Please install python-imaging')
[docs]class VirtKeyboardError(Exception):
pass
[docs]class VirtKeyboard:
"""
Handle a virtual keyboard.
"""
margin = None
"""
Margin used by :meth:`get_symbol_coords` to reduce size
of each "key" of the virtual keyboard. This attribute is always
converted to a 4-tuple, and has the same semantic as the CSS
``margin`` property (top, right, bottom, right), in pixels.
"""
codesep = ''
"""
Output separator between code strings.
See :func:`get_string_code`.
"""
def __init__(self, file=None, coords=None, color=None, convert=None):
# file: virtual keyboard image
# coords: dictionary <value to return>:<tuple(x1,y1,x2,y2)>
# color: color of the symbols in the image
# depending on the image, it can be a single value or a tuple
# convert: if not None, convert image to this target type (for example 'RGB')
if file is not None:
assert color, 'No color provided !'
self.load_image(file, color, convert)
if type(self.margin) in (int, float):
self.margin = (self.margin,) * 4
elif self.margin is not None:
if len(self.margin) == 2:
self.margin = self.margin + self.margin
elif len(self.margin) == 3:
self.margin = self.margin + (self.margin[1],)
assert len(self.margin) == 4
if coords is not None:
self.load_symbols(coords)
[docs] def load_image(self, file, color, convert=None):
self.image = Image.open(file)
if convert is not None:
self.image = self.image.convert(convert)
self.bands = self.image.getbands()
if isinstance(color, int) and not isinstance(self.bands, str) and len(self.bands) != 1:
raise VirtKeyboardError("Color requires %i component but only 1 is provided"
% len(self.bands))
if not isinstance(color, int) and len(color) != len(self.bands):
raise VirtKeyboardError("Color requires %i components but %i are provided"
% (len(self.bands), len(color)))
self.color = color
self.width, self.height = self.image.size
self.pixar = self.image.load()
[docs] def load_symbols(self, coords):
self.coords = {}
self.md5 = {}
for i in coords:
coord = self.get_symbol_coords(coords[i])
if coord == (-1, -1, -1, -1):
continue
self.coords[i] = coord
self.md5[i] = self.checksum(self.coords[i])
[docs] def check_color(self, pixel):
return pixel == self.color
[docs] def get_symbol_coords(self, coords):
"""Return narrow coordinates around symbol."""
(x1, y1, x2, y2) = coords
if self.margin:
top, right, bottom, left = self.margin
x1, y1, x2, y2 = x1 + left, y1 + top, x2 - right, y2 - bottom
newY1 = -1
newY2 = -1
for y in range(y1, min(y2 + 1, self.height)):
empty_line = True
for x in range(x1, min(x2 + 1, self.width)):
if self.check_color(self.pixar[x, y]):
empty_line = False
if newY1 < 0:
newY1 = y
break
if newY1 >= 0 and not empty_line:
newY2 = y
newX1 = -1
newX2 = -1
for x in range(x1, min(x2 + 1, self.width)):
empty_column = True
for y in range(y1, min(y2 + 1, self.height)):
if self.check_color(self.pixar[x, y]):
empty_column = False
if newX1 < 0:
newX1 = x
break
if newX1 >= 0 and not empty_column:
newX2 = x
return (newX1, newY1, newX2, newY2)
[docs] def checksum(self, coords):
(x1, y1, x2, y2) = coords
s = b''
for y in range(y1, min(y2 + 1, self.height)):
for x in range(x1, min(x2 + 1, self.width)):
if self.check_color(self.pixar[x, y]):
s += b"."
else:
s += b" "
return hashlib.md5(s).hexdigest()
[docs] def get_symbol_code(self, all_known_md5_for_symbol):
if isinstance(all_known_md5_for_symbol, str):
all_known_md5_for_symbol = [all_known_md5_for_symbol]
current_md5_in_keyboard = self.md5
for known_md5 in all_known_md5_for_symbol:
for code, cur_md5 in current_md5_in_keyboard.items():
if known_md5 == cur_md5:
return code
raise VirtKeyboardError(
'Code not found for these hashes "%s".'
% all_known_md5_for_symbol)
[docs] def get_string_code(self, string):
return self.codesep.join(self.get_symbol_code(self.symbols[c]) for c in string)
[docs] def check_symbols(self, symbols, dirname):
# symbols: dictionary <symbol>:<md5 value>
for s in symbols:
try:
self.get_symbol_code(symbols[s])
except VirtKeyboardError:
if dirname is None:
dirname = tempfile.mkdtemp(prefix='woob_session_')
self.generate_MD5(dirname)
raise VirtKeyboardError("Symbol '%s' not found; all symbol hashes are available in %s"
% (s, dirname))
[docs] def generate_MD5(self, dir):
for i in self.coords:
width = self.coords[i][2] - self.coords[i][0] + 1
height = self.coords[i][3] - self.coords[i][1] + 1
img = Image.new(''.join(self.bands), (width, height))
matrix = img.load()
for y in range(height):
for x in range(width):
matrix[x, y] = self.pixar[self.coords[i][0] + x, self.coords[i][1] + y]
img.save(dir + "/" + self.md5[i] + ".png")
self.image.save(dir + "/image.png")
[docs]class MappedVirtKeyboard(VirtKeyboard):
def __init__(self, file, document, img_element, color, map_attr="onclick", convert=None):
map_id = img_element.attrib.get("usemap")[1:]
map = document.find('//map[@id="%s"]' % map_id)
if map is None:
map = document.find('//map[@name="%s"]' % map_id)
coords = {}
for area in map.iter("area"):
code = area.attrib.get(map_attr)
area_coords = []
for coord in area.attrib.get("coords").split(' ')[0].split(','):
area_coords.append(int(coord))
coords[code] = tuple(area_coords)
super(MappedVirtKeyboard, self).__init__(file, coords, color, convert)
[docs]class GridVirtKeyboard(VirtKeyboard):
"""
Make a virtual keyboard where "keys" are distributed on a grid.
Example here: https://www.e-sgbl.com/portalserver/sgbl-web/login
:param symbols: Sequence of symbols, ordered in the grid from left to
right and up to down
:type symbols: iterable
:param cols: Column count of the grid
:type cols: int
:param rows: Row count of the grid
:type rows: int
:param image: File-like object to be used as data source
:type image: file
:param color: Color of the meaningful pixels
:type color: 3-tuple
:param convert: Mode to which convert color of pixels, see
:meth:`Image.Image.convert` for more information
"""
symbols = {}
"""Assocation table between symbols and md5s"""
def __init__(self, symbols, cols, rows, image, color, convert=None):
self.load_image(image, color, convert)
tileW = self.width / cols
tileH = self.height / rows
positions = ((s, i * tileW % self.width, i // cols * tileH)
for i, s in enumerate(symbols))
coords = dict((s, tuple(map(int, (x, y, x + tileW, y + tileH))))
for (s, x, y) in positions)
super(GridVirtKeyboard, self).__init__()
self.load_symbols(coords)
[docs]class SplitKeyboard:
"""Virtual keyboard for when the chars are in individual images, not a single grid"""
char_to_hash = None
"""dict mapping password characters to image hashes"""
codesep = ''
"""Output separator between symbols"""
def __init__(self, code_to_filedata):
"""Create a SplitKeyboard
:param code_to_filedata: dict mapping site codes to images data
:type code_to_filedata: dict[str, str]
"""
hash_to_code = {
self.checksum(data): code for code, data in code_to_filedata.items()
}
self.char_to_code = {}
for char, hashes in self.char_to_hash.items():
if isinstance(hashes, str):
hashes = (hashes,)
for hash in hash_to_code:
if hash in hashes:
self.char_to_code[char] = hash_to_code.pop(hash)
break
else:
path = tempfile.mkdtemp(prefix='woob_session_')
self.dump(code_to_filedata.values(), path)
raise VirtKeyboardError("Symbol '%s' not found; all symbol hashes are available in %s" % (char, path))
[docs] def checksum(self, buffer):
return hashlib.md5(self.convert(buffer)).hexdigest()
[docs] def dump(self, files, path):
for dat in files:
md5 = hashlib.md5(dat).hexdigest()
with open('%s/%s.png' % (path, md5), 'wb') as fd:
fd.write(dat)
[docs] def get_string_code(self, password):
symbols = []
for c in password:
symbols.append(self.char_to_code[c])
return self.codesep.join(symbols)
[docs] def convert(self, buffer):
return buffer
[docs] @classmethod
def create_from_url(cls, browser, code_to_url):
code_to_file = {
code: browser.open(url).content for code, url in code_to_url
}
return cls(code_to_file)
[docs]class Tile:
"""Tile of a image grid for SimpleVirtualKeyboard"""
def __init__(self, matching_symbol, coords, image=None, md5=None):
self.matching_symbol = matching_symbol
self.coords = coords
self.image = image
self.md5 = md5
[docs]class SimpleVirtualKeyboard:
"""Handle a virtual keyboard where "keys" are distributed on a simple grid.
:param cols: Column count of the grid
:param rows: Row count of the grid
:param file: File-like object to be used as data source
:param convert: Mode to which convert color of pixels, see
:meth:`Image.Image.convert` for more information
:param matching_symbols: symbol that match all case of image grid from left to right and top
to down, European reading way.
:param matching_symbols_coords: dict mapping matching website symbols to their image coords
(x0, y0, x1, y1) on grid image from left to right and top to
down, European reading way. It's not symbols in the image.
:param browser: Browser of woob session.
Allow to dump tiles files in same directory than session folder
"""
codesep: ClassVar[str] = ''
"""Output separator between matching symbols"""
margin: ClassVar[tuple[int, int, int, int] | tuple[int, int] | int | None] = None
"""
4-tuple(int), same as HTML margin: (top, right, bottom, left).
or 2-tuple(int), (top = bottom, right = left),
or int, top = right = bottom = left
"""
tile_margin: ClassVar[tuple[int, int, int, int] | tuple[int, int] | int | None] = None
"""
4-tuple(int), same as HTML margin: (top, right, bottom, left).
or 2-tuple(int), (top = bottom, right = left),
or int, top = right = bottom = left
"""
symbols: ClassVar[dict[str, str | tuple[str, ...]]] = None
"""
Association table between image symbols and md5s
"""
convert: ClassVar[str | None] = None
"""
Mode to which convert color of pixels, see
:meth:`Image.Image.convert` for more information
"""
tile_klass = Tile
def __init__(
self,
file: IO,
cols: int,
rows: int,
matching_symbols: list[str] | None = None,
matching_symbols_coords: dict[str, tuple[int, int, int, int]] | None = None,
browser: Browser | None = None
):
self.cols = cols
self.rows = rows
# Needed even if init is overwrite
self.path = self.build_path(browser)
# Get self.image
self.load_image(file, self.margin, self.convert)
# Get self.tiles
self.get_tiles(
matching_symbols=matching_symbols,
matching_symbols_coords=matching_symbols_coords
)
# Tiles processing
self.cut_tiles(self.tile_margin)
self.hash_md5_tiles()
[docs] def build_path(self, browser=None):
if browser and browser.responses_dirname:
return browser.responses_dirname
else:
return tempfile.mkdtemp(prefix='woob_session_')
[docs] def load_image(self, file, margin=None, convert=None):
self.image = Image.open(file)
# Resize image if margin is given
if margin:
self.image = self.cut_margin(self.image, margin)
if convert:
self.image = self.image.convert(convert)
# Give possibility to alter image before get tiles, overwrite :func:`alter_image`.
self.alter_image()
self.width, self.height = self.image.size
[docs] def alter_image(self):
pass
[docs] def cut_margin(self, image, margin):
width, height = image.size
# Verify the magin value format
if type(margin) is int:
margin = (margin, margin, margin, margin)
elif len(margin) == 2:
margin = (margin[0], margin[1], margin[0], margin[1])
elif len(margin) == 4:
margin = margin
else:
assert (len(margin) == 3) & (len(margin) > 4), \
"Margin format is wrong."
assert ((margin[0] + margin[2]) < height) & ((margin[1] + margin[3]) < width), \
"Margin is too high, there is not enough pixel to cut."
image = image.crop((
0 + margin[3],
0 + margin[0],
width - margin[1],
height - margin[2]
))
return image
[docs] def get_tiles(self, matching_symbols=None, matching_symbols_coords=None):
self.tiles = []
# Tiles coords are given
if matching_symbols_coords:
for matching_symbol in matching_symbols_coords:
self.tiles.append(
self.tile_klass(
matching_symbol=matching_symbol,
coords=matching_symbols_coords[matching_symbol]
)
)
return
assert (not self.width%self.cols) & (not self.height%self.rows), \
"Image width and height are not multiple of cols and rows. Please resize image with attribute `margin`."
# Tiles coords aren't given, calculate them
self.tileW = self.width // self.cols
self.tileH = self.height // self.rows
# Matching symbols aren't given, default value is range(columns*rows)
if not matching_symbols:
matching_symbols = ['%s' % i for i in range(self.cols*self.rows)]
assert len(matching_symbols) == (self.cols*self.rows), \
"Number of website matching symbols is not equal to the number of cases on the image."
# Calculate tiles coords for each matching symbol from 1-dimension to 2-dimensions
for index, matching_symbol in enumerate(matching_symbols):
coords = self.get_tile_coords_in_grid(index)
self.tiles.append(self.tile_klass(matching_symbol=matching_symbol, coords=coords))
[docs] def get_tile_coords_in_grid(self, case_index):
# Get the top left pixel coords of the tile
x0 = (case_index % self.cols) * self.tileW
y0 = (case_index // self.cols) * self.tileH
# Get the bottom right coords of the tile
x1 = x0 + self.tileW
y1 = y0 + self.tileH
coords = (x0, y0, x1, y1)
return(coords)
[docs] def cut_tiles(self, tile_margin=None):
for tile in self.tiles:
tile.image = self.image.crop(tile.coords)
# Resize tile if margin is given
if tile_margin:
for tile in self.tiles:
tile.image = self.cut_margin(tile.image, tile_margin)
[docs] def hash_md5_tiles(self):
for tile in self.tiles:
tile.md5 = hashlib.md5(tile.image.tobytes()).hexdigest()
[docs] def dump_tiles(self, path):
for tile in self.tiles:
tile.image.save('{}/{}.png'.format(path, tile.md5))
[docs] def get_string_code(self, password):
word = []
for digit in password:
for tile in self.tiles:
if tile.md5 in self.symbols[digit]:
word.append(tile.matching_symbol)
break
else:
# Dump file only if the symbol is not found
self.dump_tiles(self.path)
raise VirtKeyboardError(
"Symbol '%s' not found; all symbol hashes are available in %s" % (digit, self.path)
)
return self.codesep.join(word)