Parser

Markdown ファイルから TypeScript コードブロックを抽出するモジュールです。

型定義

コードを保持する ChunkDict と、位置情報付きの ChunkInfo を定義します。

export interface ChunkDict {
  [name: string]: string;
}

export interface ChunkInfo {
  code: string;
  start: number;
  end: number;
}

buildAst: AST の構築

Markdown 文字列から mdast の AST を生成します。

import remarkParse from 'remark-parse';
import { unified } from 'unified';
import type { Root } from 'mdast';

export function buildAst(markdown: string): Root {
  return unified().use(remarkParse).parse(markdown) as Root;
}

extractChunks: チャンク抽出処理

生成した AST を走査し、ts/tsx コードブロックを ChunkDict に蓄積します。

import type { Code, Html, Root } from 'mdast';
import { visit } from 'unist-util-visit';
import { extIsTs } from './utils.ts.md';
import type { ChunkDict } from ':types';

export function extractChunks(tree: Root, markdown: string): ChunkDict {
  const dict: ChunkDict = {};
  let pendingFile: string | null = null;
  visit(tree, (node) => {
    if (node.type === 'html') {
      const m = /<!--\s*file:\s*([^>]+)-->/.exec((node as Html).value ?? '');
      if (m) {
        pendingFile = m[1].trim();
      }
    }

    if (node.type === 'code' && extIsTs((node as Code).lang ?? '')) {
      const name = ((node as Code).meta ?? '').trim();
      if (!name) return;
      const key = pendingFile ?? name;
      if (dict[key]) {
        dict[key] += `\n${(node as Code).value}`;
      } else {
        dict[key] = (node as Code).value;
      }
      pendingFile = null;
    }
  });
  return dict;
}

parseChunks: コードチャンクの抽出

remark-parse で AST を構築し、ts/tsx コードブロックを ChunkDict にまとめます。

import type { Root } from 'mdast';
import type { ChunkDict } from ':types';
import { buildAst } from ':buildAst';
import { extractChunks } from ':extractChunks';

export function parseChunks(markdown: string, uri: string): ChunkDict {
  const tree: Root = buildAst(markdown);
  return extractChunks(tree, markdown);
}

extractChunkInfos: 位置情報付き抽出処理

AST を走査して各チャンクの開始位置と終了位置を記録します。

import type { Code, Html, Root } from 'mdast';
import { visit } from 'unist-util-visit';
import { extIsTs } from './utils.ts.md';
import type { ChunkInfo } from ':types';

export function extractChunkInfos(
  tree: Root,
  markdown: string,
): Record<string, ChunkInfo> {
  const dict: Record<string, ChunkInfo> = {};
  let pendingFile: string | null = null;
  visit(tree, (node) => {
    if (node.type === 'html') {
      const value = (node as Html).value ?? '';
      const m = /<!--\s*file:\s*([^>]+)-->/.exec(value);
      if (m) {
        pendingFile = m[1].trim();
      }
    }

    if (node.type === 'code' && extIsTs((node as Code).lang ?? '')) {
      const meta = (node as Code).meta ?? '';
      const name = meta.trim();
      if (!name) return;
      const key = pendingFile ?? name;
      const pos = node.position;
      const start = pos?.start.offset ?? 0;
      const end = pos?.end.offset ?? start + (node as Code).value.length;
      const full = markdown.slice(start, end);
      const idx = full.indexOf((node as Code).value);
      const codeStart = idx === -1 ? start : start + idx;
      const code = (node as Code).value;
      if (dict[key]) {
        const prev = dict[key];
        prev.code += `\n${code}`;
        prev.end = codeStart + code.length;
      } else {
        dict[key] = { code, start: codeStart, end: codeStart + code.length };
      }
      pendingFile = null;
    }
  });
  return dict;
}

parseChunkInfos: 位置情報付き抽出

コードブロックの開始位置と終了位置を含めて取得するバリエーションです。

import type { Root } from 'mdast';
import type { ChunkInfo } from ':types';
import { buildAst } from ':buildAst';
import { extractChunkInfos } from ':extractChunkInfos';

export function parseChunkInfos(
  markdown: string,
  uri: string,
): Record<string, ChunkInfo> {
  const tree: Root = buildAst(markdown);
  return extractChunkInfos(tree, markdown);
}

公開インタフェース

export { parseChunks } from ':parseChunks';
export { parseChunkInfos } from ':parseChunkInfos';
export type { ChunkDict, ChunkInfo } from ':types';

if (import.meta.vitest) {
  await import(':parser.test');
}

Tests

import { describe, expect, it } from 'vitest';
import fs from 'node:fs/promises';
import path from 'node:path';
import { parseChunks } from ':parseChunks';
import { parseChunkInfos } from ':parseChunkInfos';

describe('parseChunks', () => {
  const md = [
    '# Title',
    '',
    '`' + '`' + '`' + 'ts foo',
    'console.log(1)',
    '`' + '`' + '`',
    '',
    '<!-- file: path/to/bar.ts -->',
    '`' + '`' + '`' + 'ts bar',
    'console.log(2)',
    '`' + '`' + '`',
    '',
    '`' + '`' + '`' + 'ts foo',
    'console.log(3)',
    '`' + '`' + '`',
  ].join('\n');

  const dict = parseChunks(md, '/doc.ts.md');
  it('extracts named chunks', () => {
    expect(Object.keys(dict)).toEqual(['foo', 'path/to/bar.ts']);
    expect(dict.foo.trim().split('\n').length).toBe(2);
  });
});

describe('parseChunkInfos', () => {
  const md = [
    '# Title',
    '',
    '`' + '`' + '`' + 'ts foo',
    'console.log(1)',
    '`' + '`' + '`',
    '',
    '<!-- file: path/to/bar.ts -->',
    '`' + '`' + '`' + 'ts bar',
    'console.log(2)',
    '`' + '`' + '`',
    '',
    '`' + '`' + '`' + 'ts foo',
    'console.log(3)',
    '`' + '`' + '`',
  ].join('\n');

  const dict = parseChunkInfos(md, '/doc.ts.md');
  it('includes start and end offsets', () => {
    expect(dict.foo.start).toBeLessThan(dict.foo.end);
    expect(dict.foo.code).toContain('console.log(3)');
    expect(dict['path/to/bar.ts'].code).toContain('console.log(2)');
  });
});

describe('parseChunks with fixture', () => {
  it('parses doc fixture', async () => {
    const dir = path.join(process.cwd(), 'test', 'fixtures');
    const file = path.join(dir, 'doc.ts.md');
    const md = await fs.readFile(file, 'utf8');
    const dict = parseChunks(md, file);
    expect(dict.main).toContain("import './dep.ts.md'");
  });
});