rename var so it looks nice

jmwdpk · Feb 16, 2024 · f603fcd · f603fcd
1 parent fce2415
commit f603fcd
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/bpe_regex.py b/bpe_regex.py
@@ -54,14 +54,14 @@ def __init__(self):
         # default to vocab size of 256 (all bytes), no merges and gpt-4 pattern
         self.merges = {}
         self.vocab = {idx: bytes([idx]) for idx in range(256)}
-        self._pat_str = re.compile(GPT4_SPLIT_PATTERN)
+        self.pattern = re.compile(GPT4_SPLIT_PATTERN)
 
     def train(self, text, vocab_size, verbose=False):
         assert vocab_size >= 256
         num_merges = vocab_size - 256
 
         # split the text up into text chunks
-        text_chunks = re.findall(self._pat_str, text)
+        text_chunks = re.findall(self.pattern, text)
 
         # input text preprocessing
         ids = [list(ch.encode("utf-8")) for ch in text_chunks]
@@ -121,7 +121,7 @@ def _encode_chunk(self, text):
 
     def encode(self, text):
         # split text into chunks of text by categories defined in regex pattern
-        text_chunks = re.findall(self._pat_str, text)
+        text_chunks = re.findall(self.pattern, text)
         # all chunks of text are encoded separately, then results are joined
         ids = []
         for chunk in text_chunks: