Skip to content

Commit 8c1f1bd

Browse files
committed
Write output files to output folders
1 parent dbd4ee6 commit 8c1f1bd

File tree

5 files changed

+6
-5
lines changed

5 files changed

+6
-5
lines changed

run_boilerpipe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def main():
1616
item_id = os.path.basename(path).split('.')[0]
1717
extractor = Extractor(extractor='ArticleExtractor', html=html)
1818
output[item_id] = {'articleBody': extractor.getText()}
19-
with codecs.open('output-boilerpipe.json', 'wt', encoding='utf8') as f:
19+
with codecs.open(os.path.join('output', 'boilerpipe.json'),
20+
'wt', encoding='utf8') as f:
2021
json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4)
2122

2223

run_dragnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def main():
1414
item_id = path.stem.split('.')[0]
1515
content = extract_content(html, encoding='utf8')
1616
output[item_id] = {'articleBody': content}
17-
Path('output-dragnet.json').write_text(
17+
(Path('output') / 'dragnet.json').write_text(
1818
json.dumps(output, sort_keys=True, ensure_ascii=False, indent=4),
1919
encoding='utf8')
2020

run_html_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def main():
1313
html = f.read()
1414
item_id = path.stem.split('.')[0]
1515
output[item_id] = {'articleBody': html_text.extract_text(html)}
16-
Path('output-html-text.json').write_text(
16+
(Path('output') / 'html-text.json').write_text(
1717
json.dumps(output, sort_keys=True, ensure_ascii=False, indent=4),
1818
encoding='utf8')
1919

run_newspaper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def main():
1818
article.set_html(html)
1919
article.parse()
2020
output[item_id] = {'articleBody': article.text}
21-
Path('output-newspaper.json').write_text(
21+
(Path('output') / 'newspaper.json').write_text(
2222
json.dumps(output, sort_keys=True, ensure_ascii=False, indent=4),
2323
encoding='utf8')
2424

run_readability.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def main():
1616
doc = Document(html)
1717
text = html_text.extract_text(doc.summary(html_partial=True))
1818
output[item_id] = {'articleBody': text}
19-
Path('output-readability.json').write_text(
19+
(Path('output') / 'readability.json').write_text(
2020
json.dumps(output, sort_keys=True, ensure_ascii=False, indent=4),
2121
encoding='utf8')
2222

0 commit comments

Comments
 (0)