Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge pull request #1 from fisa712/master #2

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
Create test_preprocessing.py
  • Loading branch information
fisa712 authored Feb 24, 2023
commit c9e7ae4f88f99f2be87f0964c08ea90a68be733a
27 changes: 27 additions & 0 deletions ch08/additional_features/.github/workflows/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest
from preprocessing import *

class TestPreprocessing(unittest.TestCase):

def test_remove_stopwords(self):
text = "this is a sample text that includes some stop words such as the, and, etc."
expected_output = "sample text includes stop words like , , etc ."
self.assertEqual(remove_stopwords(text), expected_output)

def test_perform_lemmatization(self):
text = "running played plays"
expected_output = "running played play"
self.assertEqual(perform_lemmatization(text), expected_output)

def test_perform_stemming(self):
text = "running played plays"
expected_output = "run play play"
self.assertEqual(perform_stemming(text), expected_output)

def test_preprocess_text(self):
text = "This is a sample text. It includes some stop words, and it has words in different tenses (e.g. playing, played)."
expected_output = "thi sampl text . includ stop word , word differ tens ( e.g. play , play ) ."
self.assertEqual(preprocess_text(text), expected_output)

if __name__ == '__main__':
unittest.main()