提供基础爬虫接口、爬虫脚本,集成到Eureka,主要实现异构系统使用。 如果需要添加新的脚本的在jobs\tasks下添加
🏠 Homepage
- python3
- Flask
git clone https://github.com/GuoGuang/spider.git
CREATE TABLE `movie` (
`id` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`name` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '电影名称',
`desc` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '电影描述',
`classify` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '类别',
`actor` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '主演',
`director` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '导演',
`cover_pic` varchar(300) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '封面图',
`pics` varchar(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '图片地址',
`magnet_url` varchar(5000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '磁力下载地址',
`online _url` varchar(5000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '在线播放地址',
`pub_date` bigint(20) NOT NULL COMMENT '发布日期',
`rating` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '评分',
`source` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '来源',
`visits` int(11) NOT NULL DEFAULT 0 COMMENT '阅读数',
`is_recommend` int(11) NOT NULL DEFAULT 0 COMMENT '是否推荐,0不推荐,1推荐',
`update_at` bigint(20) NOT NULL,
`create_at` bigint(20) NOT NULL,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_pu_date`(`pub_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
// If you create a new entity use auto generate model
flask-sqlacodegen "mysql://root:123456@127.0.0.1/movie_cat" --tables user --outfile "common/models/user.py" --flask
# 使用以下命令启动爬虫
python manager.py runjob -m movie
# 使用以下命令启动Flask web
python manager.py runserver
Use Linux Crontab implementation
// 编辑文件
crontab -e
# 编写脚本 自动执行爬虫
* */1 * * * { export ops_config=local && python3 /Yourdirectory/manager.py runjob -m movie }
👤 GuoGuang
- Twitter: @GuoGuang0536
- Github: @GuoGuang0536
Contributions, issues and feature requests are welcome!
Feel free to check issues page.
Give a ⭐️ if this project helped you!
Copyright © 2019 GuoGuang.
This project is GuoGuang licensed.