extractus / article-extractor

To extract main article from given URL with Node.js

Home Page:https://extractor-demos.pages.dev/article-extractor

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

request CommonJS version

dimaslanjaka opened this issue · comments

I have typescript project with article-parser, but after update got crashed because this lib using ESM. Please compile the CommonJS version.

gambar

@dimaslanjaka hello, there is still commonjs version, could you try the following:

const { extract } = require('article-parser/dist/cjs/article-parser.js')
const { extract } = require('article-parser/dist/cjs/article-parser.js')

its working, but the types not found.
image

if using below codes, the definition types found.

import { extract } from 'article-parser';

@dimaslanjaka yes, it seems that the types definition should be copied to dist/cjs while building commonjs version. I'm currious about your environment, are you using typescript on Node.js? which version of Node.js your application runs on?

Node 16.x latest
Typescript 4.x latest

@dimaslanjaka please try v7.0.2, hope that copying type definitions to commonjs folder can help.

i upgraded article-parser to latest

causes:

  • error "NOT EXPORTED"
  • the definition types not solved

image

image

My current sollution is using this snippet for commonJS typescript.

using article-parser 7.0.1

declare var require: any; /* if not installed @types/node */
import { writeFileSync } from 'fs-extra';
import { join } from 'path';
import { cwd } from 'process';

(async () => {
  const parser: typeof import('article-parser') =
    await require('article-parser/dist/cjs/article-parser.js');
  const extract = parser.extract;
  extract(
    'https://zilliongamer.com/chimeraland/c/basic-materials-list/mining-materials-list'
  ).then((data) => {
    writeFileSync(
      join(
        cwd(),
        'tmp/article',
        data.title.replace(/[^a-zA-Z ]/g, '-') + '.html'
      ),
      data.content
    );
  });
})();

please fix 7.0.2 to be like 7.0.1 but improved with customized definition types. or give above snippet on documentation. thank you

@dimaslanjaka hello, thank you for raising problem.

I've created a simple package with 3 files as below:

package.json

{
  "name": "typeart",
  "version": "1.0.0",
  "main": "index.ts",
  "scripts": {
    "prestart": "npx tsc",
    "start": "node dist/index.js"
  },
  "devDependencies": {
    "typescript": "^4.8.3"
  }
}

tsconfig.json

{
  "compilerOptions": {
    "module": "commonjs",
    "esModuleInterop": true,
    "target": "es6",
    "moduleResolution": "node",
    "sourceMap": true,
    "outDir": "dist"
  },
  "lib": ["es2015"]
}

index.ts

import { extract } from 'article-parser';

(async () => {
  const data = await extract('https://zilliongamer.com/chimeraland/c/basic-materials-list/mining-materials-list')
  console.log(`extracted article: "${data.title}"`)
})();

Then I try to use article-parser 4 versions: 7.0.1, 7.0.2, 7.0.3 and 7.1.0

pnpm i

pnpm i article-parser@7.0.1
npm start # failed

pnpm i article-parser@7.0.2
npm start # failed

pnpm i article-parser@7.0.3
npm start # success

pnpm i article-parser@7.1.0
npm start # success

Screenshot from 2022-09-17 15-26-56

It seems that you can use 7.0.3 and the latest 7.1.0 without problem.

iam tried ur snippet with my project configuration (iam using ts-node to run it btw)

Using 7.0.1

image

Using 7.0.2
image
image

Using 7.0.3 (work)
image
image

tsconfig.json

{
  // Change this to match your project
  "include": [
    "*.ts",
    "./views",
    "./public",
    "./src",
    "./chimeraland",
    "./chimeraland/typings",
    "./tlon"
  ],
  "compilerOptions": {
    "module": "CommonJS",
    "target": "es2018",
    // Tells TypeScript to read JS files, as
    // normally they are ignored as source files
    "allowJs": true,
    "checkJs": false,
    // Generate d.ts files
    "declaration": true,
    // This compiler run should
    // only output d.ts files
    "emitDeclarationOnly": false,
    // Types should go into this directory.
    // Removing this would place the .d.ts files
    // next to the .js files
    "outDir": "dist",
    "esModuleInterop": true,
    "resolveJsonModule": true,
    "moduleResolution": "node",
    "allowSyntheticDefaultImports": true,
    "skipLibCheck": true,
    "skipDefaultLibCheck": true,
    "noImplicitAny": false,
    "noUnusedLocals": false,
    "allowUmdGlobalAccess": true,
    "allowUnreachableCode": true,
    "allowUnusedLabels": true,
    "noImplicitThis": false,
    "typeRoots": [
      "./node_modules/@types",
      "./tmp/typings",
      "./src/types"
    ],
    "types": [
      "datatables.net",
      "jquery",
      "node",
      "toastr",
      "bootstrap"
    ],
    "lib": [
      "DOM",
      "ES5",
      "ES6",
      "ES2015",
      "ES2016",
      "ES2017",
      "ES2018",
      "ES2019",
      "ES2020",
      "DOM.Iterable",
      "WebWorker"
    ]
  },
  "typeAcquisition": {
    "enable": true
  },
  "exclude": [
    "**/node_modules/**"
  ]
}

package.json

{
  "name": "hexo-backend",
  "version": "0.3.0",
  "description": "A sample Node.js app using Express 4",
  "engines": {
    "node": "16.x"
  },
  "main": "index.js",
  "scripts": {
    "start": "node dist/index.js",
    "dev": "cross-env-shell NODE_ENV=development node -r ts-node/register index.dev.ts",
    "prod": "cross-env-shell NODE_ENV=production node -r ts-node/register index.dev.ts",
    "fix": "git config config.pull false && git config core.autocrlf false",
    "update": "npx npm-check-updates -u -x chalk",
    "test": "node test.js",
    "push": "npm-run-all -s push:**",
    "push:build-pre": "npm run install:remote",
    "push:build": "gulp tsc copy commit",
    "push:github": "git push github hexo-backend",
    "push:heroku": "git push heroku hexo-backend:master",
    "install:local": "npm i file:../git-command-helper file:../persistent-cache --save",
    "install:remote": "npm i https://github.com/dimaslanjaka/git-command-helper https://github.com/dimaslanjaka/persistent-cache#improve2 --save",
    "gallery-deploy": "npm-run-all -s gallery gallery-push",
    "gallery": "node -r ts-node/register chimeraland/gallery/index.ts",
    "gallery-push": "node -r ts-node/register chimeraland/gallery/_deploy.test.ts",
    "heroku:pull": "git pull heroku master --recurse-submodules"
  },
  "dependencies": {
    "@algolia/autocomplete-js": "^1.7.1",
    "ansi-colors": "^4.1.3",
    "article-parser": "^7.0.3",
    "axios": "^0.27.2",
    "basic-ftp": "^5.0.2",
    "bluebird": "^3.7.2",
    "bootstrap": "^5.2.1",
    "chalk": "^1.0.0",
    "clean-css": "^5.3.1",
    "cors": "^2.8.5",
    "countries-list": "^2.6.1",
    "datatables.net": "^1.12.1",
    "debug": "^4.3.4",
    "deepmerge-ts": "^4.2.1",
    "dotenv": "^16.0.2",
    "ejs": "^3.1.8",
    "event-stream": "~4.0.1",
    "express": "^4.18.1",
    "express-session": "^1.17.3",
    "folder-hash": "^4.0.2",
    "fs-extra": "^10.1.0",
    "ftp": "^0.3.10",
    "git-command-helper": "github:dimaslanjaka/git-command-helper",
    "gulp-util": "^3.0.8",
    "hexo-util": "^2.7.0",
    "html-minifier": "^4.0.0",
    "html-minifier-terser": "^7.0.0",
    "image-validator": "^1.2.1",
    "install": "^0.13.0",
    "javascript-obfuscator": "^4.0.0",
    "jquery": "^3.6.1",
    "js-cookie": "^3.0.1",
    "jsdom": "^20.0.0",
    "kill-process-by-name": "file:packages/kill-process-by-name",
    "moment-timezone": "^0.5.37",
    "node-libcurl": "^2.3.4",
    "node-sass": "^7.0.3",
    "node-tesseract-ocr": "^2.2.1",
    "npm": "^8.19.2",
    "persistent-cache": "github:dimaslanjaka/persistent-cache#improve2",
    "select2": "^4.1.0-rc.0",
    "session-file-store": "^1.5.0",
    "sharp": "^0.31.0",
    "shell-exec": "^1.1.2",
    "ssh2-sftp-client": "^9.0.4",
    "terser": "^5.15.0",
    "through2": "^4.0.2",
    "tiny-lr": "2.0.0",
    "toastr": "^2.1.4",
    "upath": "^2.0.1"
  },
  "devDependencies": {
    "@types/bluebird": "^3.5.36",
    "@types/bootstrap": "^5.2.4",
    "@types/cors": "^2.8.12",
    "@types/ejs": "^3.1.1",
    "@types/express": "^4.17.14",
    "@types/express-session": "^1.17.5",
    "@types/folder-hash": "^4.0.2",
    "@types/fs-extra": "^9.0.13",
    "@types/ftp": "^0.3.33",
    "@types/gulp": "^4.0.9",
    "@types/gulp-util": "^3.0.36",
    "@types/hexo-util": "^0.6.5",
    "@types/html-minifier": "^4.0.2",
    "@types/html-minifier-terser": "^7.0.0",
    "@types/jquery": "^3.5.14",
    "@types/js-cookie": "^3.0.2",
    "@types/jsdom": "^20.0.0",
    "@types/moment-timezone": "^0.5.30",
    "@types/node": "^18.7.18",
    "@types/node-sass": "^4.11.3",
    "@types/prettier": "^2.7.0",
    "@types/select2": "^4.0.55",
    "@types/session-file-store": "^1.2.2",
    "@types/sharp": "^0.31.0",
    "@types/ssh2-sftp-client": "^7.1.0",
    "@types/through2": "github:dimaslanjaka/nodejs-package-types#through2",
    "@types/toastr": "^2.1.40",
    "@typescript-eslint/eslint-plugin": "^5.37.0",
    "@typescript-eslint/parser": "^5.37.0",
    "cross-env": "^7.0.3",
    "eslint": "^8.23.1",
    "eslint-config-prettier": "^8.5.0",
    "eslint-plugin-prettier": "^4.2.1",
    "got": "^12.4.1",
    "gulp": "^4.0.2",
    "nodemon": "^2.0.20",
    "npm-run-all": "^4.1.5",
    "prettier": "^2.7.1",
    "safelinkify": "file:../safelink",
    "tape": "^5.6.0",
    "ts-node": "^10.9.1",
    "typescript": "^4.8.3"
  },
  "repository": {
    "type": "git",
    "url": "https://git.heroku.com/hexo-backend"
  },
  "homepage": "https://hexo-backend.herokuapp.com/",
  "keywords": [
    "node",
    "heroku",
    "express"
  ],
  "license": "MIT"
}