/**
 * MorphFoundry.js
 * 軽量な形態素解析エンジンのリファレンス実装
 * Version: 0.1.0
 *
 * ライセンス: Apache License 2.0
 * 辞書データ: UniDic（国立国語研究所, Apache 2.0）など CSV を JSON に変換した外部辞書を想定
 *
 * このライブラリは教育目的で提供しています。
 * 本番システムで利用する場合は正式な辞書データを組み合わせて十分にテストしてください。
 */

export class MorphFoundry {
  constructor(options = {}) {
    this.dictionaries = [];
    this.connectionCost = options.connectionCost || 500;
    this.unknownCost = options.unknownCost || 9000;
    this.maxTokenLength = options.maxTokenLength || 8;
    this.maxCandidatesPerSurface = options.maxCandidatesPerSurface || 16;
    this.maxTotalCandidates =
      typeof options.maxTotalCandidates === 'number'
        ? options.maxTotalCandidates
        : 64;
    this.enableDebug = options.debug || false;
    this.preprocessors = options.preprocessors || [];
  }

  /**
   * 外部辞書を読み込み、内部構造に格納する
   * @param {string} url JSON辞書のURL
   * @returns {Promise<void>}
   */
  async loadDictionary(url) {
    if (!url) {
      throw new Error('辞書URLまたはファイルパスを指定してください。');
    }

    const payload = await this._loadDictionaryPayload(url);
    if (!Array.isArray(payload.entries)) {
      throw new Error('辞書データが不正です。entries配列が見つかりません。');
    }
    const map = new Map();
    for (const entry of payload.entries) {
      if (!entry.surface) continue;
      const key = entry.surface;
      if (!map.has(key)) {
        map.set(key, []);
      }
      map.get(key).push({
        surface: entry.surface,
        reading: entry.reading || entry.surface,
        lemma: entry.lemma || entry.surface,
        pos: entry.pos || '名詞',
        subpos: entry.subpos || '*',
        conjugationType: entry.cType || '*',
        conjugationForm: entry.cForm || '*',
        cost: typeof entry.cost === 'number' ? entry.cost : 5000,
      });
    }
    this.dictionaries.push({
      name: payload.label || 'anonymous',
      version: payload.version || 'unknown',
      entryCount:
        typeof payload.entryCount === 'number'
          ? payload.entryCount
          : payload.entries.length,
      map,
    });
  }

  async _loadDictionaryPayload(url) {
    const isNode =
      typeof process !== 'undefined' &&
      process.versions &&
      process.versions.node;
    const isLikelyFile =
      /^file:/.test(url) || (!/^https?:\/\//i.test(url) && !/^data:/.test(url));

    if (isNode && isLikelyFile) {
      const { readFile } = await import('node:fs/promises');
      const { fileURLToPath } = await import('node:url');
      const { resolve } = await import('node:path');
      let targetPath = url;
      if (/^file:/.test(url)) {
        targetPath = fileURLToPath(url);
      } else if (!/^[\\/]/.test(url)) {
        targetPath = resolve(process.cwd(), url);
      }
      const jsonText = await readFile(targetPath, 'utf8');
      return JSON.parse(jsonText);
    }

    const response = await fetch(url);
    if (!response.ok) {
      throw new Error(`辞書の取得に失敗しました: ${response.status}`);
    }
    return response.json();
  }

  /**
   * カスタムエントリを追加する
   * @param {Object} tokenEntry
   */
  addCustomEntry(tokenEntry) {
    if (!tokenEntry || !tokenEntry.surface) {
      throw new Error('surfaceを含むエントリを指定してください。');
    }
    const customDictionary =
      this.dictionaries.find((dic) => dic.name === 'custom') ||
      this._createCustomDictionary();

    const tokens = customDictionary.map.get(tokenEntry.surface) || [];
    tokens.push({
      surface: tokenEntry.surface,
      reading: tokenEntry.reading || tokenEntry.surface,
      lemma: tokenEntry.lemma || tokenEntry.surface,
      pos: tokenEntry.pos || '名詞',
      subpos: tokenEntry.subpos || '*',
      conjugationType: tokenEntry.conjugationType || '*',
      conjugationForm: tokenEntry.conjugationForm || '*',
      cost: typeof tokenEntry.cost === 'number' ? tokenEntry.cost : 1000,
    });
    customDictionary.map.set(tokenEntry.surface, tokens);
  }

  /**
   * テキストを形態素解析する
   * シンプルな Viterbi 風の動的計画法で最小コスト系列を求める
   * @param {string} text
   * @returns {Array<Object>}
   */
  analyze(text) {
    if (typeof text !== 'string' || text.length === 0) {
      return [];
    }

    const normalized = this._applyPreprocessors(text);
    const lattice = this._buildLattice(normalized);
    return this._backtrack(lattice);
  }

  /**
   * 既存辞書のメタ情報を返す
   * @returns {Array<{name: string, version: string, size: number, entryCount: number}>}
   */
  listDictionaries() {
    return this.dictionaries.map((dic) => ({
      name: dic.name,
      version: dic.version,
      size: dic.map.size,
      entryCount: dic.entryCount ?? dic.map.size,
    }));
  }

  _createCustomDictionary() {
    const dic = {
      name: 'custom',
      version: 'local',
      entryCount: 0,
      map: new Map(),
    };
    this.dictionaries.push(dic);
    return dic;
  }

  _applyPreprocessors(text) {
    return this.preprocessors.reduce((acc, fn) => {
      try {
        return fn(acc);
      } catch (error) {
        console.warn('前処理でエラーが発生しました', error);
        return acc;
      }
    }, text);
  }

  _buildLattice(text) {
    const nodes = [];
    nodes[0] = [{ cost: 0, index: -1, token: null, surface: '' }];
    for (let position = 0; position < text.length; position++) {
      if (!nodes[position]) {
        continue;
      }

      const candidates = this._lookupDictionaries(text, position);
      if (candidates.length === 0) {
        // 未知語として 1 文字切り出し
        const char = text[position];
        const unknownNode = {
          surface: char,
          reading: char,
          lemma: char,
          pos: this._guessUnknownPos(char),
          subpos: '*',
          conjugationType: '*',
          conjugationForm: '*',
          cost: this.unknownCost,
          isUnknown: true,
        };
        candidates.push(unknownNode);
      }

      for (const candidate of candidates) {
        const startIndex = position;
        const endIndex = position + candidate.surface.length;
        const prevNodes = nodes[startIndex];
        if (!prevNodes) continue;
        const entryCost = candidate.cost + this.connectionCost;
        for (const prevNode of prevNodes) {
          const totalCost = prevNode.cost + entryCost;
          const cell = {
            cost: totalCost,
            index: startIndex,
            token: candidate,
            surface: candidate.surface,
            prev: prevNode,
          };
          if (!nodes[endIndex]) {
            nodes[endIndex] = [];
          }
          const prevBest = nodes[endIndex].find(
            (item) => item.surface === candidate.surface && item.index === startIndex,
          );
          if (!prevBest || prevBest.cost > cell.cost) {
            nodes[endIndex].push(cell);
          }
        }
      }
    }
    return nodes;
  }

  _lookupDictionaries(text, position) {
    const results = [];
    const remaining = text.slice(position, position + this.maxTokenLength);
    for (let length = 1; length <= remaining.length; length++) {
      const surface = remaining.slice(0, length);
      for (const dictionary of this.dictionaries) {
        const entries = dictionary.map.get(surface);
        if (!entries) continue;
        const limit = Math.min(entries.length, this.maxCandidatesPerSurface);
        for (let i = 0; i < limit; i++) {
          results.push(entries[i]);
          if (results.length >= this.maxTotalCandidates) {
            return results;
          }
        }
      }
    }
    return results;
  }

  _guessUnknownPos(char) {
    if (/[ぁ-ん]/.test(char)) return '名詞';
    if (/[ァ-ヴー]/.test(char)) return '名詞';
    if (/[一-龯]/.test(char)) return '名詞';
    if (/[a-zA-Z]/.test(char)) return 'アルファベット';
    if (/[0-9]/.test(char)) return '数詞';
    return '記号';
  }

  _backtrack(nodes) {
    let position = nodes.length - 1;
    let bestNode = null;

    const lastNodes = nodes[position] || [];
    for (const node of lastNodes) {
      if (!bestNode || node.cost < bestNode.cost) {
        bestNode = node;
      }
    }
    if (!bestNode) {
      return [];
    }

    const sequence = [];
    while (bestNode && bestNode.token) {
      sequence.unshift({
        surface: bestNode.token.surface,
        reading: bestNode.token.reading,
        lemma: bestNode.token.lemma,
        pos: bestNode.token.pos,
        subpos: bestNode.token.subpos,
        conjugationType: bestNode.token.conjugationType,
        conjugationForm: bestNode.token.conjugationForm,
        cost: bestNode.token.cost,
        unknown: Boolean(bestNode.token.isUnknown),
      });
      bestNode = bestNode.prev;
    }

    if (this.enableDebug) {
      console.table(sequence);
    }

    return sequence;
  }
}

/**
 * ユーティリティ関数: 新しい MorphFoundry インスタンスを生成し辞書をロードする
 * @param {Object} options
 * @returns {Promise<MorphFoundry>}
 */
export async function createMorphFoundry(options = {}) {
  const instance = new MorphFoundry(options);
  if (Array.isArray(options.defaultDictionaries)) {
    for (const url of options.defaultDictionaries) {
      await instance.loadDictionary(url);
    }
  }
  return instance;
}

/**
 * テキストを解析して可読な表形式文字列を返すヘルパー
 * @param {MorphFoundry} analyzer
 * @param {string} text
 * @returns {string}
 */
export async function analyzeToTable(analyzer, text) {
  const tokens = analyzer.analyze(text);
  const headers = ['表層形', '読み', '原形', '品詞', 'コスト', '未知語'];
  const lines = [headers.join('\t')];
  for (const token of tokens) {
    lines.push(
      [
        token.surface,
        token.reading,
        token.lemma,
        token.pos,
        token.cost,
        token.unknown ? 'yes' : 'no',
      ].join('\t'),
    );
  }
  return lines.join('\n');
}

/**
 * Web Worker や CLI などで使いやすいようにメッセージベースの解析を行うヘルパー
 * @param {MessageEvent} event
 * @param {MorphFoundry} analyzer
 */
export function handleWorkerMessage(event, analyzer) {
  const { type, payload } = event.data || {};
  if (type === 'analyze') {
    const tokens = analyzer.analyze(payload.text || '');
    postMessage({ type: 'analyzed', tokens });
  } else if (type === 'addCustom') {
    analyzer.addCustomEntry(payload.entry);
    postMessage({ type: 'customAdded', surface: payload.entry.surface });
  } else if (type === 'listDictionary') {
    postMessage({ type: 'dictionary', data: analyzer.listDictionaries() });
  }
}

/**
 * CLI向けユーティリティ
 * Node.js環境で ES Modules をサポートしていれば以下のように利用可能:
 *
 * node --loader ts-node/esm morphfoundry-cli.mjs --dictionary unidic-lite.json "解析したい文章"
 */
export async function loadFromCommandLine(argv, fetcher) {
  const args = argv || [];
  const dictionaryFlagIndex = args.findIndex((item) => item === '--dictionary');
  if (dictionaryFlagIndex === -1 || dictionaryFlagIndex === args.length - 1) {
    throw new Error('--dictionary オプションが指定されていません');
  }
  const dictionaryPath = args[dictionaryFlagIndex + 1];
  const text = args.slice(dictionaryFlagIndex + 2).join(' ');
  if (!text) {
    throw new Error('解析するテキストを指定してください');
  }
  const analyzer = new MorphFoundry();
  const response = await fetcher(dictionaryPath);
  const dictionary = await response.json();
  analyzer.dictionaries.push({
    name: dictionary.label || 'cli-dictionary',
    version: dictionary.version || 'cli',
    map: new Map(dictionary.entries.map((entry) => [entry.surface, [entry]])),
  });
  return analyzer.analyze(text);
}

