主要用于日常格式化文本分析,包括分隔符文件,JSON文件,以及分隔符JSON混排文件。项目没有额外Jar包依赖,代码简洁,功能强大。可作为日常分析小工具使用。支持多种形式的数据采集格式,如JSON,分隔符,正则分隔,自定JavaScript分隔,自定Format.class分隔等。
测试文件内容如下:
## 分隔符
create table log.txt (id,name,ip,segment,num) fmt |;
## JSON格式
create table log.json (id,name,ip,segment,num) fmt json;
## Java类提取
update table log.txt (id,name,ip,segment,num) fmt format.class;
desc log.txt;
select * from log.txt;
## JavaScript脚本提取
update table log.txt (id,name,ip,segment,num) fmt format.js;
desc log.txt;
select * from log.txt;
## 正则提取
update table log.txt (id,name,ip,segment,num) fmt ~(.*?)|(.*?)|(.*?)|(.*?)|(.*);
desc log.txt more;
select * from log.txt;
# 简单查询
select name,ip from log.txt;
select name,ip from log.json;
select name,ip from log.{txt,json};
# JSON提取
select name,json_path(segment,$.service) from log.txt;
select name,json_path(segment,$.service) from log.json;
select name,json_path(segment,$.service) from log.{json,txt};
select * from log.txt where name='taobao' or name='ctrip';
select * from log.json where name='taobao' or name='ctrip';
select * from log.{txt,json} where name='taobao' or name='ctrip';
select name,sum(num) as total,avg(num),max(num),min(num),count(num) from log.txt group by name;
select name,sum(num) as total,avg(num),max(num),min(num),count(num) from log.json group by name;
select name,sum(num) as total,avg(num),max(num),min(num),count(num) from log.{txt,json} group by name;
select name,ip,num from log.txt into tmp.tb;
select name,ip,num from tmp.tb where name='taobao';
drop table log.txt;
drop table log.json;