// npm 패키지
arjson
Bit-level JSON encoder + delta-chain protocol (weavepack-json reference implementation). Smaller than MessagePack/CBOR for structured/repetitive JSON; ships per-payload-addressable chains for storing edit histories.
주간
312
월간
813
버전
24
메인테이너
1
라이선스
MIT
최초 publish
2025-03-14
publisher
asteroiddao
tarball
1,327,605 B
AUTO-PUBLISHED·1개 버전 인덱싱됨·최근 publish: 2026-05-27
// publisher 캠페인by asteroiddao
이 계정에서 catch된 패키지 9건고립된 catch가 아닙니다. 동일 publisher가 8개의 다른 패키지를 추가로 발행했고, 모두 파이프라인이 catch했습니다 — 일회성이 아닌 조직적 캠페인의 형태. 아래 링크는 각 형제 catch의 분석으로 이동합니다.
// offending code· @0.1.5· 1 file flagged
- @0.1.5··AUTO-PUBLISHED·publisher: asteroiddaoheuristic 75/100static flags 1llm skippedmature-packagehas-source-repoosv-flagged:MAL-2026-5189public-github-push
// offending code· 1 file flaggedpatterns: 1
--- package/package.json (excerpt) --- { "name": "arjson", "version": "0.1.5", "description": "Bit-level JSON encoder + delta-chain protocol (weavepack-json reference implementation). Smaller than MessagePack/CBOR for structured/repetitive JSON; ships per-payload-addressable chains for storing edit histories.", "keywords": [ "json", "encoding", "compression", "weavepack", "delta", "msgpack-alternative", "cbor-alternative", "bit-packing" ], "homepage": "https://github.com/weavedb/arjson", "repository": { "type": "git", "url": "https://github.com/weavedb/arjson.git" }, "license": "MIT", "type": "module", "main": "dist/cjs/index.js", "module": "dist/src/index.js", "bin": { "wpkt-json": "bin/wpkt-json.js" }, "scripts": { "build:cjs": "babel src --out-dir dist/cjs --config-file ./.babelrc-cjs", "build": "rm -rf dist && npm run build:cjs && cp src -rf dist/esm && cp bin -rf dist/ && node make.js && cp .npmignore dist/", "test": "node --test test/test.js test/delta.js test/edge-cases.js test/brackets.test.js test/modular.test.js test/regression.test.js test/extension-gate.test.js test/fuzz.test.js test/golden.test.js test/unit.test.js test/api.test.js test/matrix.test.js test/interface-lock.test.js test/delta-invariants.test.js test/null-profile.test.js test/tensor-profile.test.js test/tensor-half.test.js test/tensor-fp8.test.js test/tensor-region-replace.test.js test/tensor-skip-load.test.js test/tensor-stream.test.js test/wpkt-json-cl --- bundled output (OSV-MAL flagged — LLM scope expansion) --- --- test/bench-beat-json-brotli.js (bundled) --- // Goal: beat json+brotli on raw size by exploiting ARJSON's strmap as a // deterministic per-document dictionary fed to a downstream compressor. // // Pipelines tested: // A. json (raw) // B. json + brotli (the target to beat — has 120 KB built-in text dict) // C. arjson + brotli (our previous baseline) // D. arjson + zstd (no dict) // E. arjson + zstd with strmap-derived dictionary (the new pipeline) // F. arjson + brotli with strmap as raw prefix (cheaper variant) // // Workloads: per-document AND a homogeneous corpus of 100 user records, // since that's where json+brotli was beating us. import { gzipSync, brotliCompressSync, constants } from "zlib" import { writeFileSync, readFileSync, mkdtempSync, rmSync } from "fs" import { tmpdir } from "os" import { join } from "path" import { execFileSync } from "child_process" import { encode as msgEnc } from "@msgpack/msgpack" import { encode as cborEnc } from "cbor-x" import { enc as encA, ARJSON } from "../src/arjson.js" import { Encoder, encode } from "../src/encoder.js" import { Decoder } from "../src/decoder.js" const BR_OPTS = { params: { [constants.BROTLI_PARAM_QUALITY]: 11, [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_GENERIC, }, } // ─── strmap → dictionary ────────────────────────────────────────────────── // // ARJSON's strmap is { "0": "alice", "1": "admin", ... }. Lay them out as a // canonical byte sequence so brotli/zstd can use them as backreference // source bytes. Concatenate by index order, separated by 0x00. This gives // brotli/zstd a seed of the strings most likely to recur in the encoded // data — keys, common values, etc. function strmapToDict(strmap) { const indices = Object.keys(strmap).map(Number).sort((a, b) => a - b) const parts = indices.map(i => strmap[i] ?? "") return Buffer.from(parts.join("\0"), "utf8") } // Encode a doc and capture its strmap (so we can ship a dictionary alongside). function encodeWithStrmap(json) { const u = new Encoder() co --- test/bench-compress.js (bundled) --- // How close to "minimum bits" does ARJSON+brotli actually get? // // Measures, for each workload (and for the corpus as a whole): // - raw size in each binary format (arjson, msgpack, cbor, json) // - +gzip // - +brotli (max quality) // // The smallest result across all rows is the empirical lower bound for // general-purpose (no-trained-dictionary) compression on this corpus. import { encode as msgEnc } from "@msgpack/msgpack" import { encode as cborEnc } from "cbor-x" import { enc as encA } from "../src/arjson.js" import { gzipSync, brotliCompressSync, constants } from "zlib" const W = (await import("./bench-workloads.js")).default const BR_OPTS = { params: { [constants.BROTLI_PARAM_QUALITY]: 11, [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_GENERIC, }, } const sizes = (data) => { const json = JSON.stringify(data) const jsonB = Buffer.from(json, "utf8") const msgB = Buffer.from(msgEnc(data)) const cborB = Buffer.from(cborEnc(data)) const arjB = Buffer.from(encA(data)) return { json: jsonB.length, json_gz: gzipSync(jsonB).length, json_br: brotliCompressSync(jsonB, BR_OPTS).length, msg: msgB.length, msg_gz: gzipSync(msgB).length, msg_br: brotliCompressSync(msgB, BR_OPTS).length, cbor: cborB.length, cbor_gz: gzipSync(cborB).length, cbor_br: brotliCompressSync(cborB, BR_OPTS).length, arj: arjB.length, arj_gz: gzipSync(arjB).length, arj_br: brotliCompressSync(arjB, BR_OPTS).length, } } const pad = (s, n, right = false) => { s = String(s) if (s.length >= n) return s return right ? s + " ".repeat(n - s.length) : " ".repeat(n - s.length) + s } const NW = 28 const VW = 9 console.log() console.log("─".repeat(132)) console.log(" Compression-pipeline size benchmark — bytes per encoded form (lower is better)") console.log("─".repeat(132)) console.log() console.log( pad("workload", NW, true) + " " + [ "json", "json+gz", "json+br", "msg", "msg+gz", "msg+br" --- test/bench-worker.js (bundled) --- // Worker: run encode+decode benchmark for one workload, one library. // Reads JSON {lib, op, data, n} from stdin, writes JSON {ns, size} or {error}. import { encode as msgEnc, decode as msgDec } from "@msgpack/msgpack" import { encode as cborEnc, decode as cborDec } from "cbor-x" import { enc as encO, dec as decO } from "../src-orig/arjson.js" import { enc as encN, dec as decN } from "../src/arjson.js" let raw = "" process.stdin.on("data", c => (raw += c)) process.stdin.on("end", () => { const { lib, op, data, n } = JSON.parse(raw) try { let buf = null if (op === "encode") { if (lib === "msgpack") buf = msgEnc(data) else if (lib === "cbor") buf = cborEnc(data) else if (lib === "orig") buf = encO(data) else if (lib === "new") buf = encN(data) else if (lib === "json") buf = JSON.stringify(data) const fn = lib === "msgpack" ? () => msgEnc(data) : lib === "cbor" ? () => cborEnc(data) : lib === "orig" ? () => encO(data) : lib === "new" ? () => encN(data) : () => JSON.stringify(data) const t0 = process.hrtime.bigint() for (let i = 0; i < n; i++) fn() const t1 = process.hrtime.bigint() const size = typeof buf === "string" ? Buffer.byteLength(buf, "utf8") : buf.length process.stdout.write(JSON.stringify({ ns: Number(t1 - t0), size })) } else { // decode const buf = lib === "msgpack" ? msgEnc(data) : lib === "cbor" ? cborEnc(data) : lib === "orig" ? encO(data) : lib === "new" ? encN(data) : JSON.stringify(data) const fn = lib === "msgpack" ? () => msgDec(buf) : lib === "cbor" ? () => cborDec(buf) : lib === "orig" ? () => decO(buf) : lib === "new" ? () => decN(b --- test/bench-workloads.js (bundled) --- // Shared workload corpus for bench scripts. const W = { null_: null, true_: true, int_small: 42, int_neg: -1234567, string_short: "hello", string_med: "The quick brown fox jumps over the lazy dog", string_long: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(20), float: 3.14159, tiny_obj: { a: 1, b: 2 }, tiny_arr: [1, 2, 3], user_record: { id: 12345, username: "alice", name: "Alice Johnson", email: "alice@example.com", age: 30, active: true, role: "admin", tags: ["staff", "verified"], preferences: { theme: "dark", notifications: true, language: "en" }, }, log_entry: { ts: 1709876543210, level: "info", service: "api-gateway", method: "POST", path: "/v1/users", status: 200, duration_ms: 47.3, user_id: "u_8f3a92", request_id: "r_19283abc", }, config_doc: { server: { host: "0.0.0.0", port: 8080, ssl: true, timeout_ms: 30000 }, database: { host: "db.internal", port: 5432, name: "prod", pool_size: 20 }, cache: { enabled: true, ttl_s: 3600, max_entries: 100000 }, log: { level: "info", outputs: ["stdout", "file"], file: "/var/log/app.log" }, features: { feature_a: true, feature_b: false, feature_c: true }, }, schema_doc: { type: "object", required: ["id", "name"], properties: { id: { type: "string", format: "uuid" }, name: { type: "string", minLength: 1, maxLength: 255 }, age: { type: "integer", minimum: 0, maximum: 150 }, email: { type: "string", format: "email" }, tags: { type: "array", items: { type: "string" } }, meta: { type: "object", additionalProperties: true }, }, }, wide_50: (() => { const o = {} for (let i = 0; i < 50; i++) o[`field_${i}`] = i return o })(), wide_100_mixed: (() =>
