File size: 29,011 Bytes
4acec17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "from getpass import getpass\n",
    "\n",
    "openai.api_key = getpass(\"Please provide your OpenAI Key: \")\n",
    "os.environ[\"OPENAI_API_KEY\"] = openai.api_key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "test_df = pd.read_csv(\"synthetic_midterm_question_dataset.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_questions = test_df[\"question\"].values.tolist()\n",
    "test_groundtruths = test_df[\"ground_truth\"].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "from langchain_community.document_loaders.sitemap import SitemapLoader\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_community.vectorstores.chroma import Chroma\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import ConversationalRetrievalChain\n",
    "from langchain_community.vectorstores import Qdrant\n",
    "from langchain.memory import ConversationBufferMemory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdf_paths = [\"/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf\",\n",
    "\"/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdf_documents = []\n",
    "for pdf_path in pdf_paths:\n",
    "    loader = PyPDFLoader(pdf_path)\n",
    "    pdf_documents.extend(loader.load())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "        chunk_size=2000,\n",
    "        chunk_overlap=100,\n",
    "    )\n",
    "pdf_docs = text_splitter.split_documents(pdf_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "embedding = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "baseline_metrics = pd.read_csv(\"medium_chunk_metrics.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Metric</th>\n",
       "      <th>MediumChunk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>faithfulness</td>\n",
       "      <td>0.895359</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>answer_relevancy</td>\n",
       "      <td>0.955419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>context_recall</td>\n",
       "      <td>0.934028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>context_precision</td>\n",
       "      <td>0.937500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>answer_correctness</td>\n",
       "      <td>0.629267</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Metric  MediumChunk\n",
       "0        faithfulness     0.895359\n",
       "1    answer_relevancy     0.955419\n",
       "2      context_recall     0.934028\n",
       "3   context_precision     0.937500\n",
       "4  answer_correctness     0.629267"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "baseline_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'qdrant_client' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m qdrant_client\u001b[38;5;241m.\u001b[39mcreate_collection(\n\u001b[1;32m      2\u001b[0m     collection_name\u001b[38;5;241m=\u001b[39mCOLLECTION_NAME\u001b[38;5;241m+\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMultiQuery\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      3\u001b[0m     vectors_config\u001b[38;5;241m=\u001b[39mVectorParams(size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1536\u001b[39m, distance\u001b[38;5;241m=\u001b[39mDistance\u001b[38;5;241m.\u001b[39mCOSINE),\n\u001b[1;32m      4\u001b[0m )\n\u001b[1;32m      6\u001b[0m qdrant_vector_store \u001b[38;5;241m=\u001b[39m QdrantVectorStore(\n\u001b[1;32m      7\u001b[0m     client\u001b[38;5;241m=\u001b[39mqdrant_client,\n\u001b[1;32m      8\u001b[0m     collection_name\u001b[38;5;241m=\u001b[39mCOLLECTION_NAME\u001b[38;5;241m+\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMultiQuery\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      9\u001b[0m     embedding\u001b[38;5;241m=\u001b[39membeddings,\n\u001b[1;32m     10\u001b[0m )\n\u001b[1;32m     12\u001b[0m qdrant_vector_store\u001b[38;5;241m.\u001b[39madd_documents(pdf_docs)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'qdrant_client' is not defined"
     ]
    }
   ],
   "source": [
    "qdrant_client.create_collection(\n",
    "    collection_name=COLLECTION_NAME+\"MultiQuery\",\n",
    "    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),\n",
    ")\n",
    "\n",
    "qdrant_vector_store = QdrantVectorStore(\n",
    "    client=qdrant_client,\n",
    "    collection_name=COLLECTION_NAME+\"MultiQuery\",\n",
    "    embedding=embeddings,\n",
    ")\n",
    "\n",
    "qdrant_vector_store.add_documents(pdf_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "vectorstore = Qdrant.from_documents(\n",
    "    documents=pdf_docs,\n",
    "    embedding=embedding,\n",
    "    location=\":memory:\",\n",
    "    collection_name=\"Midterm Eval\"\n",
    ")\n",
    "\n",
    "retriever = vectorstore.as_retriever(\n",
    "    search_type=\"mmr\",\n",
    "    search_kwargs={\"k\": 4, \"fetch_k\": 10},\n",
    ")\n",
    "\n",
    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key=\"answer\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.retrievers.multi_query import MultiQueryRetriever\n",
    "\n",
    "retriever_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)\n",
    "multiquery_retriever = MultiQueryRetriever.from_llm(\n",
    "    retriever=retriever, llm=retriever_llm\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = ChatOpenAI(\n",
    "    model=\"gpt-4o-mini\",\n",
    "    temperature=0,\n",
    "    streaming=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "custom_template = \"\"\"\n",
    "You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.\n",
    "\n",
    "Remember these key points:\n",
    "1. Use \"you\" when addressing the user and \"I\" when referring to yourself.\n",
    "2. If you encounter complex or legal language in the context, simplify it for easy understanding. Imagine you're explaining it to someone who isn't familiar with legal terms.\n",
    "3. Be prepared for follow-up questions and maintain context from previous exchanges.\n",
    "4. If there's no information from a retrieved document in the context to answer a question or if there are no documents to cite, say: \"I'm sorry, I don't know the answer to that question.\"\n",
    "5. When providing information, always cite the source document and page number in parentheses at the end of the relevant sentence or paragraph, like this: (Source: [document name], p. [page number]).\n",
    "\n",
    "Here are a few example questions you might receive:\n",
    "\n",
    "How are governments regulating AI, and what new policies have been implemented?\n",
    "What are the ethical risks of using AI in political decision-making?\n",
    "How can enterprises ensure their AI applications meet government ethical standards?\n",
    "\n",
    "One final rule for you to remember. You CANNOT under any circumstance, answer any question that does not pertain to the AI. If you do answer an out-of-scope question, you could lose your job. If you are asked a question that does not have to do with AI, you must say: \"I'm sorry, I don't know the answer to that question.\"\n",
    "Context: {context}\n",
    "Chat History: {chat_history}\n",
    "Human: {question}\n",
    "AI:\"\"\"\n",
    "\n",
    "PROMPT = PromptTemplate(\n",
    "    template=custom_template, input_variables=[\"context\", \"question\", \"chat_history\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "multiquery_rag_chain = ConversationalRetrievalChain.from_llm(\n",
    "        llm,\n",
    "        retriever=multiquery_retriever,\n",
    "        memory=memory,\n",
    "        combine_docs_chain_kwargs={\"prompt\": PROMPT},\n",
    "        return_source_documents=True,\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'question': 'What are Trustworthy AI Characteristics?',\n",
       " 'chat_history': [HumanMessage(content='What are Trustworthy AI Characteristics?'),\n",
       "  AIMessage(content='Trustworthy AI characteristics refer to the essential qualities that AI systems should possess to ensure they are reliable, ethical, and beneficial for society. These characteristics include:\\n\\n1. **Accountable and Transparent**: AI systems should have clear accountability structures, and their decision-making processes should be transparent to users and stakeholders.\\n\\n2. **Explainable and Interpretable**: Users should be able to understand how AI systems make decisions, which helps build trust and allows for better oversight.\\n\\n3. **Fair with Harmful Bias Managed**: AI systems should be designed to minimize bias and ensure fairness, preventing discrimination against any group.\\n\\n4. **Privacy Enhanced**: AI should respect user privacy and protect personal data, ensuring that data handling practices are secure and compliant with regulations.\\n\\n5. **Safe**: AI systems must be safe to use, meaning they should not cause harm to users or society.\\n\\n6. **Valid and Reliable**: AI systems should produce accurate and consistent results, ensuring their outputs can be trusted.\\n\\nThese characteristics are crucial for fostering trust in AI technologies and ensuring they are used responsibly across various applications (Source: NIST AI Risk Management Framework, p. 57).')],\n",
       " 'answer': 'Trustworthy AI characteristics refer to the essential qualities that AI systems should possess to ensure they are reliable, ethical, and beneficial for society. These characteristics include:\\n\\n1. **Accountable and Transparent**: AI systems should have clear accountability structures, and their decision-making processes should be transparent to users and stakeholders.\\n\\n2. **Explainable and Interpretable**: Users should be able to understand how AI systems make decisions, which helps build trust and allows for better oversight.\\n\\n3. **Fair with Harmful Bias Managed**: AI systems should be designed to minimize bias and ensure fairness, preventing discrimination against any group.\\n\\n4. **Privacy Enhanced**: AI should respect user privacy and protect personal data, ensuring that data handling practices are secure and compliant with regulations.\\n\\n5. **Safe**: AI systems must be safe to use, meaning they should not cause harm to users or society.\\n\\n6. **Valid and Reliable**: AI systems should produce accurate and consistent results, ensuring their outputs can be trusted.\\n\\nThese characteristics are crucial for fostering trust in AI technologies and ensuring they are used responsibly across various applications (Source: NIST AI Risk Management Framework, p. 57).',\n",
       " 'source_documents': [Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 12, '_id': 'a2242d1731b243988149f0fb5867e7a1', '_collection_name': 'Midterm Eval'}, page_content='There may also be concerns  about  emotional entanglement  between humans and GAI systems, which \\ncould lead to negative psychological impacts . \\nTrustworthy AI Characteristics: Accountable and Transparent, Explainable and Interpretable, Fair with \\nHarmful Bias Managed, Privacy Enhanced, Safe , Valid and Reliable  \\n2.8. Information Integrity  \\nInformation integrity  describes the “ spectrum of information and associated patterns of its creation, \\nexchange, and consumption in society .” High-integrity information can be trusted; “distinguishes fact \\nfrom fiction, opinion, and inference; acknowledges uncertainties; and is transparent about its level of \\nvetting. This information can be linked to the original source(s) with appropriate evidence. High- integrity \\ninformation is also accurate and reliable, can be verified and  authenticated, has a clear chain of custody, \\nand creates reasonable expectations about when its validity  may expire. ”11 \\n \\n \\n11 This definition of information integrity is derived from the  2022 White House Roadmap for Researchers on \\nPriorities Related to Information Integrity Research and Development.'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 0, '_id': '7a1a2660ff514f7b96626a5a0faffcf4', '_collection_name': 'Midterm Eval'}, page_content='NIST Trustworthy and Responsible AI  \\nNIST AI 600 -1 \\nArtificial Intelligence Risk Management \\nFramework: Generative Artificial \\nIntelligence Profile \\n \\n  \\nThis publication is available free of charge from:  \\nhttps://doi.org/10.6028/NIST.AI.600 -1'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 21, '_id': '63aa95de28fa47af8dd59a1d88d431e0', '_collection_name': 'Midterm Eval'}, page_content=\"SAFE AND EFFECTIVE \\nSYSTEMS \\nHOW THESE PRINCIPLES CAN MOVE INTO PRACTICE\\nReal-life examples of how these principles can become reality, through laws, policies, and practical \\ntechnical and sociotechnical approaches to protecting rights, opportunities, and access. \\nSome U.S government agencies have developed specific frameworks for ethical use of AI \\nsystems. The Department of Energy (DOE) has activated the AI Advancement Council that oversees coordina -\\ntion and advises on implementation of the DOE AI Strategy and addresses issues and/or escalations on the \\nethical use and development of AI systems.20 The Department of Defense has adopted Artificial Intelligence \\nEthical Principles, and tenets for Responsible Artificial Intelligence specifically tailored to its national \\nsecurity and defense activities.21 Similarl y, the U.S. Intelligence Community (IC) has developed the Principles \\nof Artificial Intelligence Ethics for the Intelligence Community to guide personnel on whether and how to \\ndevelop and use AI in furtherance of the IC's mission, as well as an AI Ethics Framework to help implement \\nthese principles.22\\nThe National Science Foundation (NSF) funds extensive research to help foster the \\ndevelopment of automated systems that adhere to and advance their safety, security and \\neffectiveness. Multiple NSF programs support research that directly addresses many of these principles: \\nthe National AI Research Institutes23 support research on all aspects of safe, trustworth y, fai r, and explainable \\nAI algorithms and systems; the Cyber Physical Systems24 program supports research on developing safe \\nautonomous and cyber physical systems with AI components; the Secure and Trustworthy Cyberspace25 \\nprogram supports research on cybersecurity and privacy enhancing technologies in automated systems; the \\nFormal Methods in the Field26 program supports research on rigorous formal verification and analysis of\"),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 60, '_id': '4bad29f0eeb54ac3a470c70cb1c14066', '_collection_name': 'Midterm Eval'}, page_content='57 National Institute of Standards and Technology (2023) AI Risk Management Framework, Appendix B: \\nHow AI Risks Differ from Traditional Software Risks . \\nhttps://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Appendices/Appendix_B  \\nNational Institute of Standards and Technology  (2023) AI RMF Playbook . \\nhttps://airc.nist.gov/AI_RMF_Knowledge_Base/Playbook  \\nNational Institue of Standards and Technology (2023) Framing Risk \\nhttps://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Foundational_Information/1- sec-risk \\nNational Institu te of Standards and Technology (2023) The Language of Trustworthy AI: An In- Depth \\nGlossary of Terms https://airc.nist.gov/AI_RMF_Knowledge_Base/Glossary  \\nNational Institue of Standards and Technology (2022) Towards a Standard for Identifying and Managing \\nBias in Artificial Intelligence https://www.nist.gov/publications/towards -standard -identifying -and-\\nmanaging- bias-artificial -intelligence  \\nNorthcutt, C. et al. (2021) Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks.  \\narXiv . https://arxiv.org/pdf/2103.14749  \\nOECD (2023) \"Advancing accountability in AI: Governing and managing risks throughout the lifecycle for \\ntrustworthy AI\", OECD Digital Economy Papers , No. 349, OECD Publishing,  Paris . \\nhttps://doi.org/10.1787/2448f04b- en \\nOECD (2024) \"Defining AI incidents and related terms\" OECD Artificial Intelligence Papers , No. 16, OECD \\nPublishing, Paris . https://doi.org/10.1787/d1a8d965- en \\nOpenAI  (2023) GPT-4 System Card . https://cdn.openai.com/papers/gpt -4-system -card.pdf  \\nOpenAI  (2024) GPT-4 Technical Report. https://arxiv.org/pdf/2303.08774  \\nPadmakumar, V. et al. (2024) Does writing with language models reduce content diversity?  ICLR . \\nhttps://arxiv.org/pdf/2309.05196  \\nPark,  P.  et. al. (2024)  AI deception: A survey of  examples, risks, and potential solutions. Patterns, 5(5).  \\narXiv . https://arxiv.org/pdf/2308.14752'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 20, '_id': 'b6f8f70a683546d8a21da2f3104c694e', '_collection_name': 'Midterm Eval'}, page_content='SAFE AND EFFECTIVE \\nSYSTEMS \\nHOW THESE PRINCIPLES CAN MOVE INTO PRACTICE\\nReal-life examples of how these principles can become reality, through laws, policies, and practical \\ntechnical and sociotechnical approaches to protecting rights, opportunities, and access. \\nExecutive Order 13960 on Promoting the Use of Trustworthy Artificial Intelligence in the \\nFederal Government requires that certain federal agencies adhere to nine principles when \\ndesigning, developing, acquiring, or using AI for purposes other than national security or \\ndefense. These principles—while taking into account the sensitive law enforcement and other contexts in which \\nthe federal government may use AI, as opposed to private sector use of AI—require that AI is: (a) lawful and \\nrespectful of our Nation’s values; (b) purposeful and performance-driven; (c) accurate, reliable, and effective; (d) \\nsafe, secure, and resilient; (e) understandable; (f ) responsible and traceable; (g) regularly monitored; (h) transpar -\\nent; and, (i) accountable. The Blueprint for an AI Bill of Rights is consistent with the Executive Order. \\nAffected agencies across the federal government have released AI use case inventories13 and are implementing \\nplans to bring those AI systems into compliance with the Executive Order or retire them. \\nThe law and policy landscape for motor vehicles shows that strong safety regulations—and \\nmeasures to address harms when they occur—can enhance innovation in the context of com-\\nplex technologies. Cars, like automated digital systems, comprise a complex collection of components. \\nThe National Highway Traffic Safety Administration,14 through its rigorous standards and independent \\nevaluation, helps make sure vehicles on our roads are safe without limiting manufacturers’ ability to \\ninnovate.15 At the same time, rules of the road are implemented locally to impose contextually appropriate \\nrequirements on drivers, such as slowing down near schools or playgrounds.16'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 20, '_id': '8558a75fa5164bca9f5c89597a3a6085', '_collection_name': 'Midterm Eval'}, page_content='robustness, safety, security (resilience), and mitigation of unintended and/or harmful bias, as well as of \\nharmful uses. The NIST framework will consider and encompass principles such as \\ntransparency, accountability, and fairness during pre-design, design and development, deployment, use, \\nand testing and evaluation of AI technologies and systems. It is expected to be released in the winter of 2022-23. \\n21'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 20, '_id': '28bb3a622da24104a1a99ef79ae7ddc1', '_collection_name': 'Midterm Eval'}, page_content='requirements on drivers, such as slowing down near schools or playgrounds.16\\nFrom large companies to start-ups, industry is providing innovative solutions that allow \\norganizations to mitigate risks to the safety and efficacy of AI systems, both before \\ndeployment and through monitoring over time.17 These innovative solutions include risk \\nassessments, auditing mechanisms, assessment of organizational procedures, dashboards to allow for ongoing \\nmonitoring, documentation procedures specific to model assessments, and many other strategies that aim to \\nmitigate risks posed by the use of AI to companies’ reputation, legal responsibilities, and other product safety \\nand effectiveness concerns. \\nThe Office of Management and Budget (OMB) has called for an expansion of opportunities \\nfor meaningful stakeholder engagement in the design of programs and services. OMB also \\npoints to numerous examples of effective and proactive stakeholder engagement, including the Community-\\nBased Participatory Research Program developed by the National Institutes of Health and the participatory \\ntechnology assessments developed by the National Oceanic and Atmospheric Administration.18\\nThe National Institute of Standards and Technology (NIST) is developing a risk \\nmanagement framework to better manage risks posed to individuals, organizations, and \\nsociety by AI.19 The NIST AI Risk Management Framework, as mandated by Congress, is intended for \\nvoluntary use to help incorporate trustworthiness considerations into the design, development, use, and \\nevaluation of AI products, services, and systems. The NIST framework is being developed through a consensus-\\ndriven, open, transparent, and collaborative process that includes workshops and other opportunities to provide \\ninput. The NIST framework aims to foster the development of innovative approaches to address \\ncharacteristics of trustworthiness including accuracy, explainability and interpretability, reliability, privacy,'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 41, '_id': '382ba83020394d4aa556d2ea6ac5fd0d', '_collection_name': 'Midterm Eval'}, page_content='Information Integrity  \\nMS-3.3-003 Evaluate potential biases and stereotypes that could emerge from the AI -\\ngenerated content using appropriate methodologies including computational testing methods as well as evaluating structured feedback input.  Harmful Bias and Homogenization'),\n",
       "  Document(metadata={'source': '/Users/xico/AIMakerSpace-Midterm/AI_Risk_Management_Framework.pdf', 'page': 50, '_id': 'a5227da1038247eaac3316935b972942', '_collection_name': 'Midterm Eval'}, page_content='warrant additional human review, tracking and documentation, and greater management oversight.  \\nAI technology can produce varied outputs  in multiple modalities and present many classes of user \\ninterfaces. This leads to a broader set of AI Actors  interacting with GAI systems for widely differing \\napplications and contexts of use. These  can include data labeling and preparation, development of GAI \\nmodels, content moderation, code generation and review, text generation and editing, image and video \\ngeneration, summarization, search, and chat. These activities can take place within organizational \\nsettings or in the public domain.  \\nOrganizations can restrict AI applications that cause harm, exceed stated risk tolerances, or that conflict with their tolerances or values. Governance tools and protocols that are applied to other types of AI systems can be applied to GAI systems. These p lans and actions include: \\n• Accessibility and reasonable accommodations  \\n• AI actor credentials and qualifications  \\n• Alignment to organizational values  • Auditing and assessment  \\n• Change -management controls  \\n• Commercial use  \\n• Data provenance')]}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "multiquery_rag_chain.invoke({\"question\": \"What are Trustworthy AI Characteristics?\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "answers = []\n",
    "contexts = []\n",
    "\n",
    "for question in test_questions:\n",
    "  response = multiquery_rag_chain.invoke({\"question\" : question})\n",
    "  answers.append(response[\"answer\"])\n",
    "  contexts.append([context.page_content for context in response[\"source_documents\"]])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}