-
Notifications
You must be signed in to change notification settings - Fork 6
renamed extract_headline to extract_article, reduced the function cal… #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,52 +4,51 @@ | |
|
||
|
||
class Title: | ||
# Initialisations | ||
def __init__(self): | ||
self.news_url = input("\nEnter The URL : ") | ||
self.pos = [] # Variable to store all positive tokens from positive_headlines.csv file | ||
self.neg = [] # Variable to store all negative tokens from negative_headlines.csv file | ||
# self.article = newspaper.Article(news_url) | ||
|
||
# extract headline | ||
def extract_headline(self): | ||
try: | ||
article = newspaper.Article(self.news_url) | ||
article.download() | ||
article.parse() | ||
return article | ||
except newspaper.article.ArticleException: # List possible errors in case of any exception | ||
print("\nCONNECTION/URL ERROR: Article could not be retrieved.") | ||
|
||
# Adding Training/Testing Data | ||
def train(self, headline): | ||
with open("positive_headlines.csv") as file: | ||
for sentence in file: | ||
self.pos.append([{word: True for word in nltk.word_tokenize(sentence)}, 'Positive']) | ||
|
||
with open("negative_headlines.csv") as file: | ||
for sentence in file: | ||
self.neg.append([{word: True for word in nltk.word_tokenize(sentence)}, 'Negative']) | ||
|
||
training = self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))] | ||
|
||
classifier = NaiveBayesClassifier.train(training) # Training | ||
sentiment = classifier.classify({word: True for word in nltk.word_tokenize(headline)}) | ||
return sentiment | ||
|
||
# categorize headline | ||
def headline_category(self, headline, sentiment): | ||
print("\nHEADLINE :", headline.upper()) | ||
print("SENTIMENT :", sentiment) | ||
print("AUTHOR(S) :", self.extract_headline().authors, '\n') | ||
|
||
# main of class | ||
def main(self): | ||
hdln = self.extract_headline().title.strip() | ||
sntmnt = self.train(hdln) | ||
self.train(hdln) | ||
self.headline_category(hdln, sntmnt) | ||
|
||
|
||
# Initialisations | ||
def __init__(self): | ||
self.news_url = input("\nEnter The URL : ") | ||
self.pos = [] # Variable to store all positive tokens from positive_headlines.csv file | ||
self.neg = [] # Variable to store all negative tokens from negative_headlines.csv file | ||
self.article = newspaper.Article(self.news_url) | ||
|
||
# extract headline | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
def extract_article(self): | ||
try: | ||
self.article.download() | ||
self.article.parse() | ||
return self.article | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need to return the article since only the connection adapters need to be downloaded and parsed. Returning does not do anything since the article is already defined in |
||
except newspaper.article.ArticleException: # List possible errors in case of any exception | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this issue; #17
|
||
print("\nCONNECTION/URL ERROR: Article could not be retrieved.") | ||
|
||
# Adding Training/Testing Data | ||
def train(self, headline): | ||
with open("positive_headlines.csv") as file: | ||
for sentence in file: | ||
self.pos.append([{word: True for word in nltk.word_tokenize(sentence)}, 'Positive']) | ||
|
||
with open("negative_headlines.csv") as file: | ||
for sentence in file: | ||
self.neg.append([{word: True for word in nltk.word_tokenize(sentence)}, 'Negative']) | ||
|
||
training = self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))] | ||
|
||
classifier = NaiveBayesClassifier.train(training) # Training | ||
sentiment = classifier.classify({word: True for word in nltk.word_tokenize(headline)}) | ||
return sentiment | ||
|
||
def headline_category(self, headline, sentiment): | ||
print("\nHEADLINE :", headline.upper()) | ||
print("SENTIMENT :", sentiment) | ||
print("AUTHOR(S) :", self.article.authors, '\n') | ||
|
||
# main of class | ||
def main(self): | ||
article = self.extract_article() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would you define article here if self.article is already initiated?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer 2 |
||
headline = article.title.strip() | ||
sntmnt = self.train(headline) | ||
self.train(headline) | ||
self.headline_category(headline, sntmnt) | ||
|
||
|
||
if __name__ == '__main__': | ||
Title().main() | ||
Title().main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should consider working with config files, requirement files and readme.md's from now on. This improves readability, efficiency and minimizes the chance someone fricks with the code.