// npm 패키지

arjson

Bit-level JSON encoder + delta-chain protocol (weavepack-json reference implementation). Smaller than MessagePack/CBOR for structured/repetitive JSON; ships per-payload-addressable chains for storing edit histories.

npmjs.com tarball repo

주간

312

월간

813

버전

메인테이너

라이선스

MIT

최초 publish

2025-03-14

publisher

asteroiddao

tarball

1,327,605 B

AUTO-PUBLISHED·1개 버전 인덱싱됨·최근 publish: 2026-05-27

// publisher 캠페인by asteroiddao

이 계정에서 catch된 패키지 9건

고립된 catch가 아닙니다. 동일 publisher가 8개의 다른 패키지를 추가로 발행했고, 모두 파이프라인이 catch했습니다 — 일회성이 아닌 조직적 캠페인의 형태. 아래 링크는 각 형제 catch의 분석으로 이동합니다.

// offending code· @0.1.5· 1 file flagged

@0.1.5·2026-05-27·AUTO-PUBLISHED·publisher: asteroiddao

heuristic 75/100

static flags 1

llm skipped

mature-packagehas-source-repoosv-flagged:MAL-2026-5189public-github-push

// offending code· 1 file flaggedpatterns: 1

--- package/package.json (excerpt) ---
{
  "name": "arjson",
  "version": "0.1.5",
  "description": "Bit-level JSON encoder + delta-chain protocol (weavepack-json reference implementation). Smaller than MessagePack/CBOR for structured/repetitive JSON; ships per-payload-addressable chains for storing edit histories.",
  "keywords": [
    "json",
    "encoding",
    "compression",
    "weavepack",
    "delta",
    "msgpack-alternative",
    "cbor-alternative",
    "bit-packing"
  ],
  "homepage": "https://github.com/weavedb/arjson",
  "repository": {
    "type": "git",
    "url": "https://github.com/weavedb/arjson.git"
  },
  "license": "MIT",
  "type": "module",
  "main": "dist/cjs/index.js",
  "module": "dist/src/index.js",
  "bin": {
    "wpkt-json": "bin/wpkt-json.js"
  },
  "scripts": {
    "build:cjs": "babel src --out-dir dist/cjs --config-file ./.babelrc-cjs",
    "build": "rm -rf dist && npm run build:cjs && cp src -rf dist/esm && cp bin -rf dist/ && node make.js && cp .npmignore dist/",
    "test": "node --test test/test.js test/delta.js test/edge-cases.js test/brackets.test.js test/modular.test.js test/regression.test.js test/extension-gate.test.js test/fuzz.test.js test/golden.test.js test/unit.test.js test/api.test.js test/matrix.test.js test/interface-lock.test.js test/delta-invariants.test.js test/null-profile.test.js test/tensor-profile.test.js test/tensor-half.test.js test/tensor-fp8.test.js test/tensor-region-replace.test.js test/tensor-skip-load.test.js test/tensor-stream.test.js test/wpkt-json-cl

--- bundled output (OSV-MAL flagged — LLM scope expansion) ---

--- test/bench-beat-json-brotli.js (bundled) ---
// Goal: beat json+brotli on raw size by exploiting ARJSON's strmap as a
// deterministic per-document dictionary fed to a downstream compressor.
//
// Pipelines tested:
//   A. json (raw)
//   B. json + brotli (the target to beat — has 120 KB built-in text dict)
//   C. arjson + brotli (our previous baseline)
//   D. arjson + zstd (no dict)
//   E. arjson + zstd with strmap-derived dictionary (the new pipeline)
//   F. arjson + brotli with strmap as raw prefix (cheaper variant)
//
// Workloads: per-document AND a homogeneous corpus of 100 user records,
// since that's where json+brotli was beating us.

import { gzipSync, brotliCompressSync, constants } from "zlib"
import { writeFileSync, readFileSync, mkdtempSync, rmSync } from "fs"
import { tmpdir } from "os"
import { join } from "path"
import { execFileSync } from "child_process"
import { encode as msgEnc } from "@msgpack/msgpack"
import { encode as cborEnc } from "cbor-x"
import { enc as encA, ARJSON } from "../src/arjson.js"
import { Encoder, encode } from "../src/encoder.js"
import { Decoder } from "../src/decoder.js"

const BR_OPTS = {
  params: {
    [constants.BROTLI_PARAM_QUALITY]: 11,
    [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_GENERIC,
  },
}

// ─── strmap → dictionary ──────────────────────────────────────────────────
//
// ARJSON's strmap is { "0": "alice", "1": "admin", ... }. Lay them out as a
// canonical byte sequence so brotli/zstd can use them as backreference
// source bytes. Concatenate by index order, separated by 0x00. This gives
// brotli/zstd a seed of the strings most likely to recur in the encoded
// data — keys, common values, etc.
function strmapToDict(strmap) {
  const indices = Object.keys(strmap).map(Number).sort((a, b) => a - b)
  const parts = indices.map(i => strmap[i] ?? "")
  return Buffer.from(parts.join("\0"), "utf8")
}

// Encode a doc and capture its strmap (so we can ship a dictionary alongside).
function encodeWithStrmap(json) {
  const u = new Encoder()
  co

--- test/bench-compress.js (bundled) ---
// How close to "minimum bits" does ARJSON+brotli actually get?
//
// Measures, for each workload (and for the corpus as a whole):
//   - raw size in each binary format (arjson, msgpack, cbor, json)
//   - +gzip
//   - +brotli (max quality)
//
// The smallest result across all rows is the empirical lower bound for
// general-purpose (no-trained-dictionary) compression on this corpus.

import { encode as msgEnc } from "@msgpack/msgpack"
import { encode as cborEnc } from "cbor-x"
import { enc as encA } from "../src/arjson.js"
import { gzipSync, brotliCompressSync, constants } from "zlib"

const W = (await import("./bench-workloads.js")).default

const BR_OPTS = {
  params: {
    [constants.BROTLI_PARAM_QUALITY]: 11,
    [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_GENERIC,
  },
}

const sizes = (data) => {
  const json = JSON.stringify(data)
  const jsonB = Buffer.from(json, "utf8")
  const msgB = Buffer.from(msgEnc(data))
  const cborB = Buffer.from(cborEnc(data))
  const arjB = Buffer.from(encA(data))
  return {
    json: jsonB.length,
    json_gz: gzipSync(jsonB).length,
    json_br: brotliCompressSync(jsonB, BR_OPTS).length,
    msg: msgB.length,
    msg_gz: gzipSync(msgB).length,
    msg_br: brotliCompressSync(msgB, BR_OPTS).length,
    cbor: cborB.length,
    cbor_gz: gzipSync(cborB).length,
    cbor_br: brotliCompressSync(cborB, BR_OPTS).length,
    arj: arjB.length,
    arj_gz: gzipSync(arjB).length,
    arj_br: brotliCompressSync(arjB, BR_OPTS).length,
  }
}

const pad = (s, n, right = false) => {
  s = String(s)
  if (s.length >= n) return s
  return right ? s + " ".repeat(n - s.length) : " ".repeat(n - s.length) + s
}

const NW = 28
const VW = 9

console.log()
console.log("─".repeat(132))
console.log("  Compression-pipeline size benchmark — bytes per encoded form (lower is better)")
console.log("─".repeat(132))
console.log()
console.log(
  pad("workload", NW, true) +
    " " +
    [
      "json", "json+gz", "json+br",
      "msg", "msg+gz", "msg+br"

--- test/bench-worker.js (bundled) ---
// Worker: run encode+decode benchmark for one workload, one library.
// Reads JSON {lib, op, data, n} from stdin, writes JSON {ns, size} or {error}.
import { encode as msgEnc, decode as msgDec } from "@msgpack/msgpack"
import { encode as cborEnc, decode as cborDec } from "cbor-x"
import { enc as encO, dec as decO } from "../src-orig/arjson.js"
import { enc as encN, dec as decN } from "../src/arjson.js"

let raw = ""
process.stdin.on("data", c => (raw += c))
process.stdin.on("end", () => {
  const { lib, op, data, n } = JSON.parse(raw)
  try {
    let buf = null
    if (op === "encode") {
      if (lib === "msgpack") buf = msgEnc(data)
      else if (lib === "cbor") buf = cborEnc(data)
      else if (lib === "orig") buf = encO(data)
      else if (lib === "new") buf = encN(data)
      else if (lib === "json") buf = JSON.stringify(data)
      const fn =
        lib === "msgpack"
          ? () => msgEnc(data)
          : lib === "cbor"
            ? () => cborEnc(data)
            : lib === "orig"
              ? () => encO(data)
              : lib === "new"
                ? () => encN(data)
                : () => JSON.stringify(data)
      const t0 = process.hrtime.bigint()
      for (let i = 0; i < n; i++) fn()
      const t1 = process.hrtime.bigint()
      const size = typeof buf === "string" ? Buffer.byteLength(buf, "utf8") : buf.length
      process.stdout.write(JSON.stringify({ ns: Number(t1 - t0), size }))
    } else {
      // decode
      const buf =
        lib === "msgpack"
          ? msgEnc(data)
          : lib === "cbor"
            ? cborEnc(data)
            : lib === "orig"
              ? encO(data)
              : lib === "new"
                ? encN(data)
                : JSON.stringify(data)
      const fn =
        lib === "msgpack"
          ? () => msgDec(buf)
          : lib === "cbor"
            ? () => cborDec(buf)
            : lib === "orig"
              ? () => decO(buf)
              : lib === "new"
                ? () => decN(b

--- test/bench-workloads.js (bundled) ---
// Shared workload corpus for bench scripts.
const W = {
  null_: null,
  true_: true,
  int_small: 42,
  int_neg: -1234567,
  string_short: "hello",
  string_med: "The quick brown fox jumps over the lazy dog",
  string_long: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(20),
  float: 3.14159,
  tiny_obj: { a: 1, b: 2 },
  tiny_arr: [1, 2, 3],
  user_record: {
    id: 12345,
    username: "alice",
    name: "Alice Johnson",
    email: "alice@example.com",
    age: 30,
    active: true,
    role: "admin",
    tags: ["staff", "verified"],
    preferences: { theme: "dark", notifications: true, language: "en" },
  },
  log_entry: {
    ts: 1709876543210,
    level: "info",
    service: "api-gateway",
    method: "POST",
    path: "/v1/users",
    status: 200,
    duration_ms: 47.3,
    user_id: "u_8f3a92",
    request_id: "r_19283abc",
  },
  config_doc: {
    server: { host: "0.0.0.0", port: 8080, ssl: true, timeout_ms: 30000 },
    database: { host: "db.internal", port: 5432, name: "prod", pool_size: 20 },
    cache: { enabled: true, ttl_s: 3600, max_entries: 100000 },
    log: { level: "info", outputs: ["stdout", "file"], file: "/var/log/app.log" },
    features: { feature_a: true, feature_b: false, feature_c: true },
  },
  schema_doc: {
    type: "object",
    required: ["id", "name"],
    properties: {
      id: { type: "string", format: "uuid" },
      name: { type: "string", minLength: 1, maxLength: 255 },
      age: { type: "integer", minimum: 0, maximum: 150 },
      email: { type: "string", format: "email" },
      tags: { type: "array", items: { type: "string" } },
      meta: { type: "object", additionalProperties: true },
    },
  },
  wide_50: (() => {
    const o = {}
    for (let i = 0; i < 50; i++) o[`field_${i}`] = i
    return o
  })(),
  wide_100_mixed: (() =>