Skip to content

Commit 8bc85f6

Browse files
Feature/python 3.9 (#30)
* chg: improved publish script. * add: test for different white-space handling strategies. * add: support for Python 3.9 * add: support for Python 3.9 * add: testing of table handling. * chg: updated package metadata. * fix: issues reported by flake8 * chg: use Path for file handling. * add: improved testing. * chg: improved publish.sh based on codefactor feedback.
1 parent 53619d7 commit 8bc85f6

File tree

13 files changed

+172
-49
lines changed

13 files changed

+172
-49
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ dist/
1414
.pytest_cache/
1515
.coverage
1616
_build/
17+
.mypy_cache/

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ python:
33
- 3.5
44
- 3.7
55
- 3.8
6+
- 3.9
67

78
install:
89
- python setup.py install

publish.sh

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,29 @@
88
# - https://packaging.python.org/guides/distributing-packages-using-setuptools/#packaging-your-project
99
# - https://packaging.python.org/guides/making-a-pypi-friendly-readme/
1010

11-
# cleanup dist
12-
rm -rf ./dist
11+
VERSION=$(grep -Po "\b__version__ = '\K[^']+" src/inscriptis/__init__.py)
12+
IMAGE_NAME=inscriptis-web-service
1313

14-
# build and verify packages
15-
python3 setup.py sdist bdist_wheel; twine check dist/*
14+
case "$1" in
15+
python)
16+
# cleanup dist
17+
rm -rf ./dist
1618

17-
# upload
18-
twine upload dist/*
19+
# build and verify packages
20+
python3 setup.py sdist bdist_wheel; twine check dist/*
21+
22+
# upload
23+
twine upload dist/*
24+
;;
25+
docker)
26+
echo "Publishing ${IMAGE_NAME} in version ${VERSION}"
27+
docker login docker.pkg.github.com -u AlbertWeichselbraun --password-stdin < ../github-token.txt
28+
docker build -t ${IMAGE_NAME}:${VERSION} .
29+
30+
# Step 2: Tag
31+
docker tag ${IMAGE_NAME}:${VERSION} docker.pkg.github.com/weblyzard/inscriptis/${IMAGE_NAME}:${VERSION}
32+
33+
# Step 3: Publish
34+
#docker push docker.pkg.github.com/weblyzard/inscriptis/${IMAGE_NAME}:${VERSION}
35+
;;
36+
esac

scripts/inscript.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import argparse
88
import sys
9-
from os.path import isfile
9+
from pathlib import Path
1010

1111
import requests
1212

@@ -56,23 +56,24 @@ def get_parser():
5656

5757
if args.version:
5858
print('Inscript HTML to text conversion '
59-
'(based on the inscriptis library version {})'.format(
59+
'(based on the inscriptis library version {0})'.format(
6060
__version__))
6161
print('Copyright (C)', __copyright__)
6262
print('\nInscript comes with ABSOLUTELY NO WARRANTY.')
6363
print('This is free software and you are welcome to redistribute it '
64-
'under the terms of the {}.'.format(__license__))
64+
'under the terms of the {0}.'.format(__license__))
6565
sys.exit(0)
6666

6767
if not args.input:
6868
html_content = sys.stdin.read()
69-
elif isfile(args.input):
70-
with open(args.input, encoding=args.encoding, errors='ignore') as f:
69+
elif Path(args.input).is_file():
70+
with Path(args.input).open(encoding=args.encoding,
71+
errors='ignore') as f:
7172
html_content = f.read()
7273
elif args.input.startswith("http://") or args.input.startswith("https://"):
7374
html_content = requests.get(args.input).text
7475
else:
75-
print("ERROR: Cannot open input file '{}'.\n".format(args.input))
76+
print("ERROR: Cannot open input file '{0}'.\n".format(args.input))
7677
parser.print_help()
7778
sys.exit(-1)
7879

@@ -85,7 +86,7 @@ def get_parser():
8586
display_anchors=args.display_anchor_urls)
8687
text = get_text(html_content, config)
8788
if args.output:
88-
with open(args.output, 'w', encoding=args.encoding) as open_file:
89+
with Path(args.output).open('w', encoding=args.encoding) as open_file:
8990
open_file.write(text)
9091
else:
9192
print(text)

setup.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@
22
# -*- coding: utf-8 -*-
33

44
import sys
5+
from pathlib import Path
56
from setuptools import setup, find_packages
67
from os import path
78

8-
here = path.abspath(path.dirname(__file__))
9-
sys.path.insert(0, path.join(here, 'src'))
9+
here = Path(path.dirname(__file__)).resolve()
10+
sys.path.insert(0, path.join(str(here), 'src'))
1011

1112
from inscriptis import (__version__, __author__, __author_email__, __license__)
1213

1314

1415
# Get the long description from the README.md file
15-
with open(path.join(here, 'README.rst')) as f: # , encoding='utf-8'
16+
with here.joinpath(Path('README.rst')).open() as f: # , encoding='utf-8'
1617
long_description = f.read()
1718

1819
setup(
@@ -25,16 +26,21 @@
2526
author_email=__author_email__,
2627
python_requires='>=3.5',
2728
classifiers=[
28-
'Development Status :: 4 - Beta',
29-
'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
30-
'Topic :: Text Processing',
31-
'Topic :: Text Processing :: Markup :: HTML',
32-
'Programming Language :: Python :: 3.5',
33-
'Programming Language :: Python :: 3.6',
34-
'Programming Language :: Python :: 3.7',
35-
'Programming Language :: Python :: 3.8',
29+
'Development Status :: 5 - Production/Stable',
30+
'Intended Audience :: Developers',
31+
'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
32+
'Topic :: Text Processing',
33+
'Topic :: Text Processing :: Markup :: HTML',
34+
'Topic :: Utilities',
35+
'Programming Language :: Python :: 3',
36+
'Programming Language :: Python :: 3.5',
37+
'Programming Language :: Python :: 3.6',
38+
'Programming Language :: Python :: 3.7',
39+
'Programming Language :: Python :: 3.8',
40+
'Programming Language :: Python :: 3.9',
3641
],
37-
url='http://github.com/weblyzard/inscriptis',
42+
keywords='HTML,converter,text',
43+
url='https://github.com/weblyzard/inscriptis',
3844
license=__license__,
3945
package_dir={'': 'src'},
4046

src/inscriptis/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
2424
__copyright__ = '2016-2020 Albert Weichselbraun, Fabian Odoni'
2525
__license__ = 'GPL2'
26-
__version__ = '1.1'
26+
__version__ = '1.1.1'
2727
__status__ = 'Prototype'
2828

2929

src/inscriptis/html_engine.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ def _write_line(self, force=False):
132132
bool -- True, if a line has been writer, otherwise False.
133133
'''
134134
# only break the line if there is any relevant content
135-
if not force and (not self.current_line[-1].content or
136-
self.current_line[-1].content.isspace()):
135+
if not force and (not self.current_line[-1].content
136+
or self.current_line[-1].content.isspace()):
137137
self.current_line[-1].margin_before = \
138138
max(self.current_line[-1].margin_before,
139139
self.current_tag[-1].margin_before)
@@ -242,9 +242,9 @@ def _end_ul(self):
242242

243243
def _start_img(self, attrs):
244244
image_text = attrs.get('alt', '') or attrs.get('title', '')
245-
if image_text and not (self.config.deduplicate_captions and
246-
image_text == self.last_caption):
247-
self.current_line[-1].content += '[{}]'.format(image_text)
245+
if image_text and not (self.config.deduplicate_captions
246+
and image_text == self.last_caption):
247+
self.current_line[-1].content += '[{0}]'.format(image_text)
248248
self.last_caption = image_text
249249

250250
def _start_a(self, attrs):
@@ -259,7 +259,7 @@ def _start_a(self, attrs):
259259

260260
def _end_a(self):
261261
if self.link_target:
262-
self.current_line[-1].content += ']({})'.format(self.link_target)
262+
self.current_line[-1].content += ']({0})'.format(self.link_target)
263263

264264
def _start_ol(self, attrs):
265265
self.li_counter.append(1)
@@ -277,7 +277,7 @@ def _start_li(self, attrs):
277277
bullet = "* "
278278
if isinstance(bullet, int):
279279
self.li_counter[-1] += 1
280-
self.current_line[-1].list_bullet = "{}. ".format(bullet)
280+
self.current_line[-1].list_bullet = "{0}. ".format(bullet)
281281
else:
282282
self.current_line[-1].list_bullet = bullet
283283

src/inscriptis/model/canvas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
'''
55
Elements used for rendering (parts) of the canvas.
66
7-
1. the :class:`Line` determines how a single line is rendered.
7+
The :class:`Line` determines how a single line is rendered.
88
'''
99

1010

@@ -66,4 +66,4 @@ def get_text(self):
6666
'\n' * self.margin_after))
6767

6868
def __str__(self):
69-
return "<Line: '{}'>".format(self.get_text())
69+
return "<Line: '{0}'>".format(self.get_text())

src/inscriptis/model/css.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def get_style_attribute(style_attribute, html_element):
120120
key, value = (s.strip() for s in style_directive.split(':', 1))
121121

122122
try:
123-
apply_style = getattr(CssParse, "attr_" +
124-
key.replace('-webkit-', '')
123+
apply_style = getattr(CssParse, "attr_"
124+
+ key.replace('-webkit-', '')
125125
.replace("-", "_"))
126126
apply_style(value, custome_html_element)
127127
except AttributeError:
@@ -144,7 +144,7 @@ def _get_em(length):
144144
unit = _m.group(2)
145145

146146
if unit not in ('em', 'qem', 'rem'):
147-
return int(round(value/8))
147+
return int(round(value / 8))
148148
return int(round(value))
149149

150150
# ------------------------------------------------------------------------

src/inscriptis/model/table.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,14 @@ def compute_column_width_and_height(self):
116116

117117
# determine row height
118118
for row in self.rows:
119-
max_row_height = max((len(cell.get_cell_lines())
120-
for cell in row.columns)) \
121-
if row.columns else 1
119+
max_row_height = (max((len(cell.get_cell_lines())
120+
for cell in row.columns))
121+
if row.columns else 1)
122122
for cell in row.columns:
123123
cell.height = max_row_height
124124

125125
# determine maximum number of columns
126-
max_columns = max([len(row.columns) for row in self.rows])
126+
max_columns = max((len(row.columns) for row in self.rows))
127127

128128
for column_idx in range(max_columns):
129129
# determine max_column_width

0 commit comments

Comments
 (0)