Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I191855 branch feat addition #168

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Create test_preprocessing.py
  • Loading branch information
fisa712 authored Feb 24, 2023
commit 47b11b39d9dac8cbe2e3fd3660894062e1b87509
27 changes: 27 additions & 0 deletions ch08/additional_features/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest
from preprocessing import *

class TestPreprocessing(unittest.TestCase):

def test_remove_stopwords(self):
text = "this is a sample text that includes some stop words such as the, and, etc."
expected_output = "sample text includes stop words like , , etc ."
self.assertEqual(remove_stopwords(text), expected_output)

def test_perform_lemmatization(self):
text = "running played plays"
expected_output = "running played play"
self.assertEqual(perform_lemmatization(text), expected_output)

def test_perform_stemming(self):
text = "running played plays"
expected_output = "run play play"
self.assertEqual(perform_stemming(text), expected_output)

def test_preprocess_text(self):
text = "This is a sample text. It includes some stop words, and it has words in different tenses (e.g. playing, played)."
expected_output = "thi sampl text . includ stop word , word differ tens ( e.g. play , play ) ."
self.assertEqual(preprocess_text(text), expected_output)

if __name__ == '__main__':
unittest.main()