[Feature]: Integration with AIL
FafnerKeyZee opened this issue · comments
Is your feature request related to a problem? Please describe.
AIL should be able to ingrate RansomLook data.
Describe the solution you'd like
Rewrite part of API in order to be easily used by AIL
Write a feeder for AIL when a new post is detected
Describe alternatives you've considered
No response
Additional context
Maybe @adulau will have some ideas.
If you need feeder for AIL i have made this :
You can import any .txt file with data
from pathlib import Path
from typing import Optional, List
import datetime
import sys
import json
import chardet
import requests
LOG_FILE: Path = Path(__file__).parent.joinpath('log-managed.txt')
#there is a hard limit API side that create an error if the text payload is bigger than 900 000 bytes
SIZE_THRESHOLD = 700_000
def log_managed(file_name: str, status_code: int, payload: str):
"""
Logs managed files in a colon separated values format such as :
upload status : datetime : file name : HTTP RESPONSE : PAYLOAD
Args:
file_name (str): the name of the uplaoded file
status (str): the status of the upload ('SUCCESS', 'ERROR' or 'TEXT_TOO_LARGE')
result (str | None): parsed curl output
"""
if status_code == 200:
status = 'SUCCESS'
else:
status = 'HTTP_ERROR'
with open(LOG_FILE, 'a') as l:
line = [
status,
datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
file_name,
str(status_code),
payload
]
l.write(' : '.join(line) + '\n')
def log_encoding_error(file_name: str, detected_enc: str):
"""
Logs encoding error with the detected encoding
Args:
file_name (str): the name of the file being managed
detected_enc (str): the detected encoding
"""
with open(LOG_FILE, 'a') as l:
line = [
'ENCODING_ERROR',
datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
file_name,
'',
f'detected encoding : {detected_enc}'
]
l.write(' : '.join(line) + '\n')
def extract_text(file_path: Path) -> Optional[str]:
"""
Detect the encoding of the target file and extract the text as a string
Args:
file_path (Path): the Path to the target file
Returns:
Optional[str]: the extracted and decoded string or None if there was an error decoding the string
"""
with open(file_path, 'rb') as f:
raw_text = f.read()
encoding = 'No encoding detected'
try:
encoding = chardet.detect(raw_text)['encoding']
text = raw_text.decode(encoding, errors='ignore')
return text
except:
log_encoding_error(file_path.name, encoding)
return None
def requests_wrapper(payload: dict, file_name: str):
"""
Performs the call to import item and log the result
Args:
payload (dict): _description_
file_name (str): _description_
"""
header = {'Authorization': 'API_KEY ', 'Content-Type': 'application/json'}
req = requests.post(
'https://localhost:7000/api/v1/import/item',
headers=header,
data=json.dumps(payload, ensure_ascii=False).encode('utf-8'),
verify=False,
)
req_payload = req.json()['uuid'] if req.status_code == 200 else req.json()['reason']
log_managed(file_name, req.status_code, req_payload)
def make_payload(lines: List[str]) -> dict:
"""
Make the import item payload
Args:
lines (List[str]): the list of lines that will be put in the text field
Returns:
dict: the payload
"""
return {
'type': 'text',
'tags': [],
'default_tags': True,
'galaxy': [],
'text': '\n'.join(lines)
}
def upload_file(file_path: Path):
"""
Chunk the file in n parts of max SIZE_THRESHOLD bytes and call the api enpoint to provision them
Args:
file_path (Path): path to the file to be chunked and uploaded
"""
text = extract_text(file_path)
#do not hard crash if there is an encoding error, just ignore the file and go to the next
if text is None:
log_managed(file_path.name, 'ENCODING_ERROR')
return
chunks = []
acc = []
for line in text.split('\n'):
if sys.getsizeof('\n'.join(acc)) >= SIZE_THRESHOLD:
chunks.append(make_payload(acc))
acc = []
acc.append(line)
if len(acc) > 0:
chunks.append(make_payload(acc))
for payload in chunks:
requests_wrapper(payload, file_path.name)
def main(source_dir: str):
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
source_dir_path = Path(source_dir)
with open(LOG_FILE, 'a') as l:
l.write(f'INFO : new run started at {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}\n')
l.write('status : managed_at : file_name : http_status : payload\n')
for source_file in source_dir_path.iterdir():
if source_file.is_file():
upload_file(source_file)
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage : python import_local.py /path/to/source/folder')
sys.exit(1)
main(sys.argv[1])
All data needed by AIL are present in API so we can close the issue for the moment