Open2

Gemini APIのRAG『File Search Tool』✖ GAS ✖ Google Drive でやってみた!

MakiMaki
/**
 * Gemini File Search インポーター(Web/Google Drive両対応)
 * - Drive フォルダ or fileId 指定で一括インポート
 * - Google ドキュメント/スプレッドシート/スライドは自動でエクスポートして取り込み
 * - WebのURL配列も同時に取り込み可能
 * - ストア作成 → 取り込み(長時間Operation)→ 完了待ち → 根拠付きで質問
 *
 * 使い方:
 * 1) APIキーを用意(Script Propertiesに GEMINI_API_KEY、または Secret Manager)
 * 2) CONFIG をあなたの環境に合わせて設定
 * 3) main() を実行
 */

// ========== 設定 ==========
const CONFIG = {
  MODEL: 'models/gemini-2.5-flash',

  // --- 取り込み元(必要なものだけ設定) ---
  DRIVE_FOLDER_ID: 'xxxxxxx', // フォルダ配下のファイルを取り込む(未使用なら空文字に)
  DRIVE_FILE_IDS: [ /* 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', ... */ ], // 個別 fileId 指定(未使用OK)
  WEB_URLS: [
    // 例: 依頼のPDF
    'https://www.cfa.go.jp/assets/contents/node/basic_page/field_ref_resources/be80930d-51d1-4084-aa3e-b80930646538/5f5881e1/20251014_policies_shussan-kosodate_84.pdf'
  ],

  // --- 質問内容 ---
  PROMPT: 'この資料群の全体を日本語で要約して。',

  // --- ストア ---
  STORE_DISPLAY_NAME: 'drive-and-web-import-store', // 新規作成。再利用したい場合は返ってきた name を保存・再使用推奨

  // --- 認証(APIキーの取得方法) ---
  USE_SECRET_MANAGER: false,
  PROJECT_ID: 'YOUR_GCP_PROJECT_ID', // USE_SECRET_MANAGER=true のとき必須

  // --- ポーリング ---
  OPERATION_POLL_MAX: 60,           // *5秒 = 最大約5分
  OPERATION_POLL_INTERVAL_MS: 5000,

  // --- ドライブのエクスポート形式(Google系は export でダウンロード) ---
  // ドキュメント/スライドは PDF、スプレッドシートは CSV にしています(解析されやすい)。
  EXPORT_MIMES: {
    'application/vnd.google-apps.document': 'application/pdf',
    'application/vnd.google-apps.presentation': 'application/pdf',
    'application/vnd.google-apps.spreadsheet': 'text/csv',
  },
};

// ========== エントリーポイント ==========
function main() {
  const apiKey = getGeminiApiKey_();
  Logger.log('Start. model=' + CONFIG.MODEL);

  // 1) File Search ストア作成
  const store = createFileSearchStore_(apiKey, CONFIG.STORE_DISPLAY_NAME);
  Logger.log('Store created: ' + store.name); // 例: fileSearchStores/xxxx

  // 2) 取り込み対象のバイト列をすべて集める(Drive + Web)
  const blobs = [];

  // 2-1) Google Drive(フォルダ)
  if (CONFIG.DRIVE_FOLDER_ID && CONFIG.DRIVE_FOLDER_ID !== 'YOUR_DRIVE_FOLDER_ID') {
    const files = driveListFilesInFolder_(CONFIG.DRIVE_FOLDER_ID);
    Logger.log(`Drive folder files: ${files.length}`);
    for (const f of files) {
      const b = driveDownloadFileAsBlob_(f);
      if (b) blobs.push(b);
      Utilities.sleep(200); // 軽いレートリミット
    }
  }

  // 2-2) Google Drive(個別 fileId)
  if (CONFIG.DRIVE_FILE_IDS && CONFIG.DRIVE_FILE_IDS.length > 0) {
    for (const fileId of CONFIG.DRIVE_FILE_IDS) {
      const meta = driveGetFileMetadata_(fileId);
      const b = driveDownloadFileAsBlob_(meta);
      if (b) blobs.push(b);
      Utilities.sleep(200);
    }
  }

  // 2-3) Web URL
  if (CONFIG.WEB_URLS && CONFIG.WEB_URLS.length > 0) {
    for (const url of CONFIG.WEB_URLS) {
      try {
        const resp = UrlFetchApp.fetch(url);
        if (resp.getResponseCode() === 200) {
          const ct = resp.getHeaders()['Content-Type'] || 'application/octet-stream';
          const name = url.split('/').pop().split('?')[0] || 'download';
          blobs.push({ name, contentType: ct, bytes: resp.getContent() });
        } else {
          Logger.log('Skip (web fetch non-200): ' + url);
        }
      } catch (e) {
        Logger.log('Skip (web fetch error): ' + url + ' ' + e);
      }
      Utilities.sleep(200);
    }
  }

  if (blobs.length === 0) {
    throw new Error('取り込み対象が見つかりません。DRIVE_FOLDER_ID / DRIVE_FILE_IDS / WEB_URLS を確認してください。');
  }
  Logger.log(`Import targets: ${blobs.length}`);

  // 3) すべてアップロード(各ファイルごとに Operation が返る)
  const operations = [];
  for (const b of blobs) {
    const op = uploadToFileSearchStore_(apiKey, store.name, b.bytes, b.contentType);
    Logger.log(`Upload queued: ${b.name} -> ${op.name}`);
    operations.push(op.name);
    Utilities.sleep(300);
  }

  // 4) すべての Operation 完了待ち
  waitAllOperationsDone_(apiKey, operations);
  Logger.log('Indexing finished.');

  // 5) File Search を有効にして質問
  const answer = askWithFileSearch_(apiKey, store.name, CONFIG.PROMPT);
  Logger.log(JSON.stringify(answer, null, 2));
  Logger.log('Done.');
}

// ========== 認証(APIキー取得) ==========
function getGeminiApiKey_() {
  if (CONFIG.USE_SECRET_MANAGER) return getGeminiApiKeyFromSecretManager_();
  const key = PropertiesService.getScriptProperties().getProperty('GEMINI_API_KEY');
  if (!key) throw new Error('GEMINI_API_KEY が Script Properties に設定されていません。');
  return key.trim();
}

function getGeminiApiKeyFromSecretManager_() {
  if (!CONFIG.PROJECT_ID || CONFIG.PROJECT_ID === 'YOUR_GCP_PROJECT_ID') {
    throw new Error('CONFIG.PROJECT_ID をあなたのGCPプロジェクトIDに設定してください。');
  }
  const url = `https://secretmanager.googleapis.com/v1/projects/${CONFIG.PROJECT_ID}/secrets/GEMINI_API_KEY/versions/latest:access`;
  const token = ScriptApp.getOAuthToken();
  const res = UrlFetchApp.fetch(url, {
    method: 'get',
    headers: { Authorization: `Bearer ${token}` },
    muteHttpExceptions: true,
  });
  if (res.getResponseCode() !== 200) {
    throw new Error('Secret Manager から取得失敗: ' + res.getContentText());
  }
  const payload = JSON.parse(res.getContentText());
  const decoded = Utilities.newBlob(Utilities.base64Decode(payload.payload.data)).getDataAsString();
  return decoded.trim();
}

// ========== File Search: ストア作成 ==========
function createFileSearchStore_(apiKey, displayName) {
  const url = 'https://generativelanguage.googleapis.com/v1beta/fileSearchStores?key=' + encodeURIComponent(apiKey);
  const payload = { displayName };
  const res = UrlFetchApp.fetch(url, {
    method: 'post',
    contentType: 'application/json',
    payload: JSON.stringify(payload),
    muteHttpExceptions: true,
  });
  const code = res.getResponseCode();
  const body = res.getContentText();
  if (code >= 200 && code < 300) return JSON.parse(body);
  throw new Error(`createFileSearchStore failed: ${code} ${body}`);
}

// ========== File Search: アップロード ==========
function uploadToFileSearchStore_(apiKey, storeName, bytes, contentType) {
  const base = 'https://generativelanguage.googleapis.com/upload/v1beta/';
  // 重要:storeName は "fileSearchStores/xxxx" をそのままパスに入れる(/ をエンコードしない)
  const path = `${storeName}:uploadToFileSearchStore`;
  const url = `${base}${path}?uploadType=media&key=${encodeURIComponent(apiKey)}`;
  const res = UrlFetchApp.fetch(url, {
    method: 'post',
    contentType: contentType || 'application/octet-stream',
    payload: bytes,
    muteHttpExceptions: true,
  });
  const code = res.getResponseCode();
  const body = res.getContentText();
  if (code >= 200 && code < 300) return JSON.parse(body); // Long-running Operation
  throw new Error(`uploadToFileSearchStore failed: ${code} ${body}`);
}

// ========== Operations: 完了待ち(複数) ==========
function waitAllOperationsDone_(apiKey, operationNames) {
  const pending = new Set(operationNames);
  const base = 'https://generativelanguage.googleapis.com/v1beta/';

  for (let t = 0; t < CONFIG.OPERATION_POLL_MAX; t++) {
    const doneNow = [];
    for (const name of pending) {
      const url = `${base}${name}?key=${encodeURIComponent(apiKey)}`;
      const res = UrlFetchApp.fetch(url, { method: 'get', muteHttpExceptions: true });
      if (res.getResponseCode() >= 200 && res.getResponseCode() < 300) {
        const op = JSON.parse(res.getContentText());
        if (op.done) {
          if (op.error) throw new Error('Operation error: ' + JSON.stringify(op.error));
          doneNow.push(name);
        }
      } else {
        throw new Error(`operations.get failed: ${res.getResponseCode()} ${res.getContentText()}`);
      }
      Utilities.sleep(200);
    }
    doneNow.forEach(n => pending.delete(n));
    Logger.log(`Indexing progress: ${operationNames.length - pending.size}/${operationNames.length}`);
    if (pending.size === 0) return;
    Utilities.sleep(CONFIG.OPERATION_POLL_INTERVAL_MS);
  }
  throw new Error('Operation timeout: インデックス化が制限時間内に完了しませんでした。');
}

// ========== 生成:File Search を使って質問 ==========
function askWithFileSearch_(apiKey, storeName, userText) {
  const url = `https://generativelanguage.googleapis.com/v1beta/${CONFIG.MODEL}:generateContent?key=${encodeURIComponent(apiKey)}`;
  const payload = {
    contents: [{ role: 'user', parts: [{ text: userText }]}],
    // ✅ ストア指定は tools[].fileSearch 内に入れる
    tools: [{ fileSearch: { fileSearchStoreNames: [storeName] } }],
  };
  const res = UrlFetchApp.fetch(url, {
    method: 'post',
    contentType: 'application/json',
    payload: JSON.stringify(payload),
    muteHttpExceptions: true,
  });
  const code = res.getResponseCode();
  const body = res.getContentText();
  if (code >= 200 && code < 300) return JSON.parse(body);
  throw new Error(`generateContent failed: ${code} ${body}`);
}

// ========== Google Drive: ユーティリティ ==========
function driveListFilesInFolder_(folderId) {
  // Drive API v3: files.list (フォルダ直下; 再帰なし)
  const q = `'${folderId}' in parents and trashed=false`;
  const fields = 'files(id,name,mimeType,modifiedTime)';
  const token = ScriptApp.getOAuthToken();
  let pageToken = '';
  const out = [];
  do {
    const url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(q)}&fields=${encodeURIComponent(fields)}&pageSize=1000${pageToken ? '&pageToken=' + pageToken : ''}`;
    const res = UrlFetchApp.fetch(url, {
      method: 'get',
      headers: { Authorization: `Bearer ${token}` },
      muteHttpExceptions: true,
    });
    if (res.getResponseCode() !== 200) {
      throw new Error('Drive files.list 失敗: ' + res.getContentText());
    }
    const data = JSON.parse(res.getContentText());
    if (data.files && data.files.length) out.push(...data.files);
    pageToken = data.nextPageToken || '';
  } while (pageToken);
  return out;
}

function driveGetFileMetadata_(fileId) {
  const token = ScriptApp.getOAuthToken();
  const url = `https://www.googleapis.com/drive/v3/files/${fileId}?fields=id,name,mimeType,modifiedTime`;
  const res = UrlFetchApp.fetch(url, {
    method: 'get',
    headers: { Authorization: `Bearer ${token}` },
    muteHttpExceptions: true,
  });
  if (res.getResponseCode() !== 200) {
    throw new Error('Drive files.get 失敗: ' + res.getContentText());
  }
  return JSON.parse(res.getContentText());
}

function driveDownloadFileAsBlob_(fileMeta) {
  const token = ScriptApp.getOAuthToken();
  const id = fileMeta.id;
  const name = fileMeta.name || 'download';
  const mime = fileMeta.mimeType;

  // Google ドキュメント系は export
  if (CONFIG.EXPORT_MIMES[mime]) {
    const exportMime = CONFIG.EXPORT_MIMES[mime];
    const url = `https://www.googleapis.com/drive/v3/files/${id}/export?mimeType=${encodeURIComponent(exportMime)}`;
    const res = UrlFetchApp.fetch(url, {
      method: 'get',
      headers: { Authorization: `Bearer ${token}` },
      muteHttpExceptions: true,
    });
    if (res.getResponseCode() !== 200) {
      Logger.log('Export failed: ' + name + ' ' + res.getContentText());
      return null;
    }
    const bytes = res.getContent();
    const ext = exportMime === 'application/pdf' ? '.pdf'
              : exportMime === 'text/csv' ? '.csv'
              : '';
    return { name: name + ext, contentType: exportMime, bytes };
  }

  // それ以外はバイナリで取得(alt=media)
  const url = `https://www.googleapis.com/drive/v3/files/${id}?alt=media`;
  const res = UrlFetchApp.fetch(url, {
    method: 'get',
    headers: { Authorization: `Bearer ${token}` },
    muteHttpExceptions: true,
  });
  if (res.getResponseCode() !== 200) {
    Logger.log('Download failed: ' + name + ' ' + res.getContentText());
    return null;
  }
  const contentType = res.getHeaders()['Content-Type'] || 'application/octet-stream';
  return { name, contentType, bytes: res.getContent() };
}