adamkarvonen · amakelov · Nov 8, 2024
diff --git a/evals/ravel/ravel/clean_prototype.ipynb b/evals/ravel/ravel/clean_prototype.ipynb
@@ -0,0 +1,214 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "REPO_DIR = f'{os.getcwd()}'\n",
+    "SRC_DIR = os.path.join(REPO_DIR, 'src')\n",
+    "MODEL_DIR = os.path.join(REPO_DIR, 'models')\n",
+    "DATA_DIR = os.path.join(REPO_DIR, 'data')\n",
+    "\n",
+    "for d in [MODEL_DIR, DATA_DIR]:\n",
+    "    if not os.path.exists(d):\n",
+    "        os.makedirs(d)\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append(REPO_DIR)\n",
+    "sys.path.append(SRC_DIR)\n",
+    "\n",
+    "import numpy as np\n",
+    "import random\n",
+    "import torch\n",
+    "import accelerate\n",
+    "# from nnsight import NNsight\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from transformer_lens import HookedTransformer\n",
+    "\n",
+    "def set_seed(seed):\n",
+    "    random.seed(seed)\n",
+    "    np.random.seed(seed)\n",
+    "    torch.manual_seed(seed)\n",
+    "    torch.cuda.manual_seed_all(seed)\n",
+    "\n",
+    "set_seed(0)\n",
+    "\n",
+    "device = \"cpu\"\n",
+    "if torch.backends.mps.is_available():\n",
+    "    device = \"mps\"\n",
+    "elif torch.cuda.is_available():\n",
+    "    device = \"cuda\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '../../auth/hf_token.txt'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Load model\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m../../auth/hf_token.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m      5\u001b[0m     hf_token \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\u001b[38;5;241m.\u001b[39mstrip()\n\u001b[1;32m      7\u001b[0m model_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/gemma-2-2b\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
+      "File \u001b[0;32m~/miniconda3/envs/sae_eval/lib/python3.10/site-packages/IPython/core/interactiveshell.py:324\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m    317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m    318\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    319\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    320\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    321\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    322\u001b[0m     )\n\u001b[0;32m--> 324\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../../auth/hf_token.txt'"
+     ]
+    }
+   ],
+   "source": [
+    "# Load model\n",
+    "\n",
+    "\n",
+    "with open('../../auth/hf_token.txt', 'r') as f:\n",
+    "    hf_token = f.read().strip()\n",
+    "\n",
+    "model_id = \"google/gemma-2-2b\"\n",
+    "model_name = \"gemma-2-2b\"\n",
+    "\n",
+    "torch.set_grad_enabled(False) # avoid blowing up mem\n",
+    "hf_model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_id,\n",
+    "    cache_dir=MODEL_DIR,\n",
+    "    token=hf_token,\n",
+    "    device_map=device,\n",
+    "    low_cpu_mem_usage=True,\n",
+    "    attn_implementation=\"eager\"\n",
+    ")\n",
+    "\n",
+    "tokenizer =  AutoTokenizer.from_pretrained(\n",
+    "    model_id,\n",
+    "    cache_dir=MODEL_DIR,\n",
+    "    token=hf_token,\n",
+    ")\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "tokenizer.padding_side = 'left'\n",
+    "VOCAB = sorted(tokenizer.vocab, key=tokenizer.vocab.get)\n",
+    "\n",
+    "layer_idx = 10\n",
+    "\n",
+    "\n",
+    "nnsight_model = NNsight(hf_model)\n",
+    "nnsight_tracer_kwargs = {'scan': True, 'validate': False, 'use_cache': False, 'output_attentions': False}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dataset Generation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'tokenizer' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mravel_dataset_builder\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RAVELEntityPromptData\n\u001b[0;32m----> 3\u001b[0m full_entity_dataset \u001b[38;5;241m=\u001b[39m RAVELEntityPromptData\u001b[38;5;241m.\u001b[39mfrom_files(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcity\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[43mtokenizer\u001b[49m)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28mlen\u001b[39m(full_entity_dataset)\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "from ravel_dataset_builder import RAVELEntityPromptData\n",
+    "\n",
+    "full_entity_dataset = RAVELEntityPromptData.from_files('city', 'data', tokenizer)\n",
+    "len(full_entity_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sampled_entity_dataset = full_entity_dataset.downsample(1000)\n",
+    "print(f\"Number of prompts remaining: {len(sampled_entity_dataset)}\")\n",
+    "\n",
+    "prompt_max_length = 48\n",
+    "sampled_entity_dataset.generate_completions(nnsight_model, tokenizer, max_length=prompt_max_length+8, prompt_max_length=prompt_max_length)\n",
+    "\n",
+    "sampled_entity_dataset.evaluate_correctness()\n",
+    "\n",
+    "# Filter correct completions\n",
+    "correct_data = sampled_entity_dataset.filter_correct()\n",
+    "\n",
+    "# Filter top entities and templates\n",
+    "filtered_data = correct_data.filter_top_entities_and_templates(top_n_entities=400, top_n_templates_per_attribute=12)\n",
+    "\n",
+    "# Calculate average accuracy\n",
+    "accuracy = sampled_entity_dataset.calculate_average_accuracy()\n",
+    "print(f\"Average accuracy: {accuracy:.2%}\")\n",
+    "print(f\"Number of prompts remaining: {len(correct_data)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "correct_data.add_wikipedia_prompts('city', 'data', tokenizer, nnsight_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Experimental Interventions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/evals/ravel/ravel/common_imports.py b/evals/ravel/ravel/common_imports.py
@@ -0,0 +1,7 @@
+from transformer_lens import HookedTransformer
+import torch
+import numpy as np
+import pandas as pd
+from torch import Tensor
+import sys
+from typing import List, Tuple, Dict, Any, Union, Literal, Optional
diff --git a/evals/ravel/ravel/data.tgz b/evals/ravel/ravel/data.tgz