N-gram text classification for large corpus
- Process large files
- Test cases
- Docker support
pyenv local
pyenv activate
python ngrams.py some-large-text.txt
# OR
cat some-large-text.txt | python ngrams.py
docker build . -t ngrams
docker run -i --rm ngrams < ./texts/mobydick.txt
cat ./texts/mobydick.txt | docker run -i --rm ngrams
python tests.py