Bear Au Jus Text (btext) is a tool used for processing a text/string, optimized for data science and data analytics.
btext can also implemented with pandas dataframe.
- Latest Version Github Directory 1.0
- PyPI https://pypi.org/project/btext/
- Webpages https://haryobagas.github.io/btext/
Release Date : 12/23/2020 Version 1.0
- Initial Commit
Get latest version of bearaujus
pip install btext --upgrade
Core module contains base functions of base text processing from bearaujus.
import btext as bt
- A.1. Converting Text to Consecutive Letter
- A.2. Converting Text to Tokenized Consecutive Letter
- A.3. Converting Text to Consecutive Number
- A.4. Converting Text to Tokenized Consecutive Number
- A.5. Converting Text to Consecutive Punctuation
- A.6. Converting Text to Tokenized Consecutive Punctuation
- A.7. Converting Text to Lower Case
- A.8. Removing Spaces
- A.9. Removing Double Spaces
- A.10. Removing Char by User Option
- A.11. Removing Char by User Desired Length
- A.12. Get All Valid Alphabet
- A.13. Get Tokenized All Valid Alphabet
- A.14. Get All Valid Number
- A.15. Get Tokenized All Valid Number
- A.16. Get All Valid Punctuation
- A.17. Get Tokenized All Valid Punctuation
- A.18. Normalizing a Text or a Collections
- A.19. Converting Object to String
def conslet(val, sep=' ')
- Example 1
text = '=,= im getting hungry~'
text = bt.conslet(text)
print(text)
->
im getting hungry
Return Type :String
- Example 2
text = '=,= im getting hungry~'
text = bt.conslet(text, sep = '~')
print(text)
->
im~getting~hungry
Return Type :String
def conslet_tokenized(val, sep=' ')
- Example 1
text = 'John Mayer, Honne, Minami, Lisa'
text = bt.conslet_tokenized(text)
print(text)
->
['John', 'Mayer', 'Honne', 'Minami', 'Lisa']
Return Type :List
- Example 2
text = 'John Mayer, Honne, Minami, Lisa'
text = bt.conslet_tokenized(text, sep = ',')
print(text)
->
['John Mayer', 'Honne', 'Minami', 'Lisa']
Return Type :List
def consnum(val, sep='')
- Example 1
text = '+62.81231.1231.123. This is random phone numbers ! -999-'
text = bt.consnum(text)
print(text)
->
62812311231123999
Return Type :String
- Example 2
text = '+62.81231.1231.123. This is random phone numbers ! -999-'
text = bt.consnum(text, sep = '-')
print(text)
->
62-81231-1231-123-999
Return Type :String
def consnum_tokenized(val)
- Example 1
text = '+62.81231.1231.123. This is random phone numbers ! -999-'
text = bt.consnum_tokenized(text)
print(text)
->
['62', '81231', '1231', '123', '999']
Return Type :List
- Example 2
text = '+62-81231-1231-123. This is random phone numbers ! ~~'
text = bt.consnum(text, sep = '-')
print(text)
->
62-81231-1231-123
Return Type :String
def conspunc(val, sep = '') :
- Example 1
text = 'Nyummy.... !!!! this is the best pancake ever :))))'
output = bt.conspunc(text)
print(output)
->
....!!!!:))))
Return Type :String
- Example 2
text = 'Nyummy.... !!!! this is the best pancake ever :))))'
output = bt.conspunc(text, sep = ' ')
print(output)
->
. . . . ! ! ! ! : ) ) ) )
Return Type :String
def conspunc_tokenized(val) :
- Example
text = 'Nyummy.... !!!! this is the best pancake ever :))))'
output = bt.conspunc_tokenized(text)
print(output)
->
['.', '.', '.', '.', '!', '!', '!', '!', ':', ')', ')', ')', ')']
Return Type :List
def lower(val) :
- Example
text = 'HeloOo WORLd !'
output = bt.lower(text)
print(output)
->
helooo world !
Return Type :String
def remove_spaces(val) :
- Example
text = 'Hel lo Wor ld'
output = bt.remove_spaces(text)
print(output)
->
HelloWorld
Return Type :String
def remove_double_spaces(val) :
- Example
text = 'Hello World from Universe !'
output = bt.remove_double_spaces(text)
print(output)
->
Hello World from Universe !
Return Type :String
def removeby_char(val, exclude, sep = '') :
- Example 1
text = 'i dont like math, i dont like wasabi'
output = bt.removeby_char(text, exclude = 'dont')
print(output)
->
i like math, i like wasabi
Return Type :String
- Example 2
text = 'i dont like math, i dont like wasabi'
output = bt.removeby_char(text, exclude = 'dont', sep = 'didnt')
print(output)
->
i didnt like math, i didnt like wasabi
Return Type :String
- Example 3
text = 'i dont like math, i dont like wasabi'
output = bt.removeby_char(text, exclude = ['i dont', 'like'])
print(output)
->
math, wasabi
Return Type :String
- Example 4
text = 'i dont like math, i dont like wasabi'
output = bt.removeby_char(text, exclude = ['i dont', 'like'], sep = '~')
print(output)
->
~ ~ math, ~ ~ wasabi
Return Type :String
def removeby_length(val, exclude, sep = ' ') :
- Example 1
text = 'Hi hi hi welcome to the jungle'
output = bt.removeby_length(text, exclude = 2)
print(output)
->
welcome the jungle
Return Type :String
- Example 2
text = 'Hi hi hi welcome to the jungle'
output = bt.removeby_length(text, exclude = 2, sep = '~')
print(output)
->
welcome~the~jungle
Return Type :String
- Example 3
text = 'Hi hi hi welcome to the jungle'
output = bt.removeby_length(text, exclude = [2, 3])
print(output)
->
welcome jungle
Return Type :String
- Example 4
text = 'Hi hi hi welcome to the jungle'
output = bt.removeby_length(text, exclude = [2, 3], sep = ' HEI ')
print(output)
->
welcome HEI jungle
Return Type :String
def getall_alphabet(sep = '', include_upper = False) :
- Example 1
output = bt.getall_alphabet()
print(output)
->
abcdefghijklmnopqrstuvwxyz
Return Type :String
- Example 2
output = bt.getall_alphabet(sep = ' ')
print(output)
->
a b c d e f g h i j k l m n o p q r s t u v w x y z
Return Type :String
- Example 3
output = bt.getall_alphabet(include_upper = True)
print(output)
->
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
Return Type :String
- Example 4
output = bt.getall_alphabet(sep = '-', include_upper = True)
print(output)
->
a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z-A-B-C-D-E-F-G-H-I-J-K-L-M-N-O-P-Q-R-S-T-U-V-W-X-Y-Z
Return Type :String
def getall_alphabet_tokenized(include_upper = False) :
- Example 1
output = bt.getall_alphabet_tokenized()
print(output)
->
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Return Type :List
- Example 2
output = bt.getall_alphabet_tokenized(include_upper = True)
print(output)
->
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
Return Type :List
def getall_number(sep = '') :
- Example 1
output = bt.getall_number()
print(output)
->
0123456789
Return Type :String
- Example 2
output = bt.getall_number(sep = ' ')
print(output)
->
0 1 2 3 4 5 6 7 8 9
Return Type :String
def getall_number_tokenized() :
- Example
output = bt.getall_number_tokenized()
print(output)
->
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
Return Type :List
def getall_punc(sep = '') :
- Example 1
output = bt.getall_punc()
print(output)
->
!"#$%&'()*+,-./:;<=>?@[\]^_{|}~
Return Type :String
- Example 2
output = bt.getall_punc(sep = ' ')
print(output)
->
! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ { | } ~
Return Type :String
def getall_punc_tokenized() :
- Example
output = bt.getall_punc_tokenized()
print(output)
->
['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '{', '|', '}', '~']
Return Type :List
- Converting Text to Consecutive Letter
- Converting Text to Lower Case
- Removing Double Spaces
def normalize(obj, show_process = False) :
- Example 1
text = 'Nyummy8888888888888888 3235.... !!!! this is the best pancake ever :))))'
output = bt.normalize(text)
print(output)
->
nyummy this is the best pancake ever
Return Type :String
- Example 2
my_list = ['UwU this is so good :3', 'LETS GOO MAN !', 'okay you fine ! :3']
output = bt.normalize(my_list)
print(output)
->
['uwu this is so good', 'lets goo man', 'okay you fine']
Return Type :List
- Example 3
my_list = ['UwU this is so good :3', 'LETS GOO MAN !', 'okay you fine ! :3']
output = bt.normalize(my_list, show_process = True)
print(output)
Normalizing Data: [####################] 100.0% | P: 3 / 3 [ Done ]
->['uwu this is so good', 'lets goo man', 'okay you fine']
Return Type :List
def to_string(obj) :
- Example 1
number = 125.12525215
output = bt.to_string(number)
print(output)
->
125.12525215
Return Type :String
- Example 2
my_list = ['UwU this is so good :3', 'LETS GOO MAN !', 'okay you fine ! :3']
output = bt.to_string(my_list)
print(output)
->
UwU this is so good :3 LETS GOO MAN ! okay you fine ! :3
Return Type :String
- Main Github Page : https://github.com/haryobagas/
- Linkedin : https://www.linkedin.com/in/haryobagas08/
Other documentation work in progress.
Bear Au Jus - ジュースとくま @2020