17
17
try :
18
18
# python 2 compatibility
19
19
from HTMLParser import HTMLParser
20
+ unescape = HTMLParser ().unescape
20
21
except ImportError :
21
22
from html import unescape
22
23
@@ -53,7 +54,7 @@ def __init__(self, html_tree, display_images=False, deduplicate_captions=False,
53
54
'li' : self .start_li ,
54
55
'br' : self .newline ,
55
56
'a' : self .start_a if display_links else None ,
56
- 'img' : self .start_img if display_images else None ,
57
+ 'img' : self .start_img if display_images else None ,
57
58
}
58
59
self .end_tag_handler_dict = {
59
60
'table' : self .end_table ,
@@ -78,7 +79,7 @@ def __init__(self, html_tree, display_images=False, deduplicate_captions=False,
78
79
self .current_table = []
79
80
self .li_counter = []
80
81
self .li_level = 0
81
- self .invisible = [] # a list of attributes that are considered invisible
82
+ self .invisible = [] # a list of attributes that are considered invisible
82
83
self .last_caption = None
83
84
84
85
# used if display_links is enabled
@@ -120,8 +121,8 @@ def write_line(self, force=False):
120
121
'''
121
122
# only break the line if there is any relevant content
122
123
if not force and (not self .current_line [- 1 ].content or self .current_line [- 1 ].content .isspace ()):
123
- self .current_line [- 1 ].margin_before = max (self .current_line [- 1 ].margin_before , \
124
- self .current_tag [- 1 ].margin_before )
124
+ self .current_line [- 1 ].margin_before = max (self .current_line [- 1 ].margin_before ,
125
+ self .current_tag [- 1 ].margin_before )
125
126
return False
126
127
127
128
line = self .current_line [- 1 ].get_text ()
@@ -141,7 +142,8 @@ def handle_starttag(self, tag, attrs):
141
142
142
143
cur = CSS .get (tag , Inscriptis .DEFAULT_ELEMENT )
143
144
if 'style' in attrs :
144
- cur = CssParse .get_style_attribute (attrs ['style' ], html_element = cur )
145
+ cur = CssParse .get_style_attribute (
146
+ attrs ['style' ], html_element = cur )
145
147
self .current_tag .append (cur )
146
148
if cur .display == Display .none or self .invisible :
147
149
self .invisible .append (cur )
@@ -151,10 +153,12 @@ def handle_starttag(self, tag, attrs):
151
153
# flush text before display:block elements
152
154
if cur .display == Display .block :
153
155
if not self .write_line ():
154
- self .current_line [- 1 ].margin_before = max (self .current_line [- 1 ].margin_before , cur .margin_before )
156
+ self .current_line [- 1 ].margin_before = max (
157
+ self .current_line [- 1 ].margin_before , cur .margin_before )
155
158
self .current_line [- 1 ].padding = self .next_line [- 1 ].padding
156
159
else :
157
- self .current_line [- 1 ].margin_after = max (self .current_line [- 1 ].margin_after , cur .margin_after )
160
+ self .current_line [- 1 ].margin_after = max (
161
+ self .current_line [- 1 ].margin_after , cur .margin_after )
158
162
159
163
handler = self .start_tag_handler_dict .get (tag , None )
160
164
if handler :
@@ -167,7 +171,8 @@ def handle_endtag(self, tag):
167
171
return
168
172
169
173
self .next_line [- 1 ].padding = self .current_line [- 1 ].padding - cur .padding
170
- self .current_line [- 1 ].margin_after = max (self .current_line [- 1 ].margin_after , cur .margin_after )
174
+ self .current_line [- 1 ].margin_after = max (
175
+ self .current_line [- 1 ].margin_after , cur .margin_after )
171
176
# flush text after display:block elements
172
177
if cur .display == Display .block :
173
178
# propagate the new padding to the current line, if nothing has
@@ -193,7 +198,7 @@ def handle_data(self, data):
193
198
194
199
def start_ul (self , attrs ):
195
200
self .li_level += 1
196
- self .li_counter .append (Inscriptis .get_bullet (self .li_level - 1 ))
201
+ self .li_counter .append (Inscriptis .get_bullet (self .li_level - 1 ))
197
202
198
203
def end_ul (self ):
199
204
self .li_level -= 1
@@ -220,7 +225,6 @@ def end_ol(self):
220
225
self .li_level -= 1
221
226
self .li_counter .pop ()
222
227
223
-
224
228
def start_li (self , attrs ):
225
229
self .write_line ()
226
230
if self .li_level > 0 :
@@ -287,5 +291,3 @@ def get_bullet(index):
287
291
the bullet that corresponds to the given index
288
292
'''
289
293
return Inscriptis .UL_COUNTER [index % Inscriptis .UL_COUNTER_LEN ]
290
-
291
-
0 commit comments