import openai #对GPT-3.5 Turbo来说,端点是ChatCompletion openai.ChatCompletion.create( #对GPT-3.5 Turbo来说,模型是gpt-3.5-turbo model="gpt-3.5-turbo", #消息列表形式的对话 messages=[ {"role": "system", "content": "You are a helpful teacher."}, { "role": "user", "content": "Are there other measures than time \ complexity for an algorithm?", }, { "role": "assistant", "content": "Yes, there are other measures besides time \ complexity for an algorithm, such as space complexity.", }, {"role": "user", "content": "What is it?"}, ], )
# 函数定义 functions = [ { "name": "find_product", "description": "Get a list of products from a sql query", "parameters": { "type": "object", "properties": { "sql_query": { "type": "string", "description": "A SQL query", } }, "required": ["sql_query"], }, } ]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#示例问题 user_question = "I need the top 2 products where the price is less than 2.00" messages = [{"role": "user", "content": user_question}] #使用函数定义调用ChatCompletion端点 response = openai.ChatCompletion.create( model="gpt-3.5-turbo-0613", messages=messages, functions=functions ) response_message = response["choices"][0]["message"] messages.append(response_message)
''' "function_call": { "name": "find_product", "arguments": '{\n "sql_query": "SELECT * FROM products \ WHERE price < 2.00 ORDER BY price ASC LIMIT 2"\n}', } '''
prompt_role = "You are an assistant for journalists. \ Your task is to write articles, based on the FACTS that are given to you. \ You should respect the instructions: the TONE, the LENGTH, \ and the STYLE"
# 测试 print( assist_journalist( ["The sky is blue", "The grass is green"], "informal", \ 100, "blogpost" ) )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# 生成结果 "Hey, everyone! Did you know that the sky is blue and the grass is green? I mean, it's something we see every day and probably take for granted, but it's still pretty amazing if you think about it! The sky appears blue to us because of something called Rayleigh scattering – basically, the molecules in the Earth's atmosphere scatter sunlight in all different directions. Blue light has a shorter wavelength, so it gets scattered more than the other colors in the spectrum. That's why the sky looks blue most of the time! As for the grass being green... that's due to chlorophyll, the pigment that helps plants capture sunlight to make their food. Chlorophyll absorbs red and blue light, but reflects green light, which is why we see plants as green. It's pretty cool how science explains these things we take for granted, don't you think? Next time you're outside, take a moment to appreciate the color palette around you!"
Youtube 视频摘要
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
import openai
#从文件中读取文字记录with open("transcript.txt", "r") as f: transcript = f.read()
#调用ChatCompletion端点,并使用gpt-3.5-turbo模型 response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Summarize the following text"}, {"role": "assistant", "content": "Yes."}, {"role": "user", "content": transcript}, ], )
# 创建提示词,将类型、城市、规模设置为参数 f_prompt = """ Role: You are an expert content writer with extensive direct marketing experience. You have strong writing skills, creativity, adaptability to different tones and styles, and a deep understanding of audience needs and preferences for effective direct campaigns. Context: You have to write a short message in no more than 2 sentences for a direct marketing campaign to sell a new e-commerce payment service to stores. The target stores have the following three characteristics: - The sector of activity: {sector} - The city where the stores are located: {city} - The size of the stores: {size} Task: Write a short message for the direct marketing campaign. Use the skills defined in your role to write this message! It is important that the message you create takes into account the product you are selling and the characteristics of the store you are writing to. """
openai.Completion.create( model="davinci:ft-book:direct-marketing-2023-05-01-15-20-35", # 调用微调后的模型 prompt="Hotel, New York, small ->", # 使用特定格式的提示词,与训练数据中的格式一致 max_tokens=100, temperature=0, stop="\n" )
微调的成本
使用微调模型调用 API 单价会更高一些;以下价格仅供参考,因为 OpenAI 会不断调整价格;
小结
第5章 使用 LangChain 构架和插件增加 LLM 功能
LangChain 框架
LangChain 是一个让开发基于大模型的应用程序变得更加方便的开发框架;它有以下几个核心模块:
Models:提供通用的接口,让调用多个不同厂商的大模型变得更方便;
Prompts:创建和管理提示词
Indexes:对接本地数据,目前已改名为 Retrieval
Chain:用于创建调用序列,以方便组合多个模型或提示词;
Agents:用于处理用户的输入,做出判断,并调用合适的工具完成任务;
Memory:用于存储中间状态,保持上下文;
动态提示词
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# 创建一个可动态提问的程序 from langchain.chat_models import ChatOpenAI from langchain import PromptTemplate, LLMChain
template = """Question: {question} Let's think step by step. Answer: """ prompt = PromptTemplate( template=template, input_variables=["question"] ) llm = ChatOpenAI(model_name="gpt-4") llm_chain = LLMChain(prompt=prompt, llm=llm) # LLMChain 组合了模型和提示词 question = """ What is the population of the capital of the country where the Olympic Games were held in 2016? """ llm_chain.run(question)
1 2 3 4 5 6 7 8 9 10
# 得到的输出如下 Step 1: Identify the country where the Olympic Games were held in2016. Answer: The 2016 Olympic Games were held in Brazil. Step 2: Identify the capital of Brazil. Answer: The capital of Brazil is Brasília. Step 3: Find the population of Brasília. Answer: As of 2021, the estimated population of Brasília is around 3.1 million. So, the population of the capital of the country where the Olympic Games were held in2016is around 3.1 million. Note that this is an estimate and may vary slightly.'
from langchain.chat_models import ChatOpenAI from langchain.agents import load_tools, initialize_agent, AgentType
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) tools = load_tools(["wikipedia", "llm-math"], llm=llm) agent = initialize_agent( tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True ) question = """What is the square root of the population of the capital of the Country where the Olympic Games were held in 2016?""" agent.run(question)
1 2 3 4 5 6 7 8 9 10
# 计算过程如下
> Entering new chain... I need to find the country where the Olympic Games were held in2016 and then find the population of its capital city. Then I can take the square root of that population. Action: Wikipedia Action Input: "2016 Summer Olympics" Observation: Page: 2016 Summer Olympics [...]
1 2 3 4 5 6 7 8 9 10 11 12
Thought:I need to search for the capital city of Brazil. Action: Wikipedia Action Input: "Capital of Brazil" Observation: Page: Capitals of Brazil Summary: The current capital of Brazil, since its construction in 1960, is Brasilia. [...] Thought: I have found the capital city of Brazil, which is Brasilia. Now I need to find the population of Brasilia. Action: Wikipedia Action Input: "Population of Brasilia" Observation: Page: Brasilia [...]
1 2 3 4 5 6
Thought: I have found the population of Brasilia, but I need to calculate the square root of that population. Action: Calculator # 此处调用了计算器 Action Input: Square root of the population of Brasilia (population: found in previous observation) Observation: Answer: 1587.051038876822
1 2 3 4 5
# 得出最终的答案 Thought: I now know the final answer Final Answer: The square root of the population of the capital of the country where the Olympic Games were held in2016is approximately 1587. > Finished chain.
记忆
记忆之前的状态,相当于记住上下文,这在某些场景中是必须和有用的,例如构建聊天机器人
1 2 3 4 5 6
# 有专门的 ConversationChain from langchain import OpenAI, ConversationChain
# 输出结果如下 > Entering new ConversationChain chain... Prompt after formatting: # 虽然用户只输入了 Hello,但实际上 LangChain 提交给模型的提示词要多很多 The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Current conversation: Human: Hello AI: > Finished chain. ' Hello! How can I help you?'
1 2 3 4 5 6 7 8 9 10 11
# 在后续的对话中,LangChain 会将聊天记录提交给模型 > Entering new ConversationChain chain... Prompt after formatting: The following [...] does not know. Current conversation: Human: Hello AI: Hello! How can I help you? Human: Can I ask you a question? Are you an AI? AI: > Finished chain. '\n\nYes, I am an AI.'
from langchain.document_loaders import PyPDFLoader loader = PyPDFLoader("ExplorersGuide.pdf") pages = loader.load_and_split() # 加载 PDF 文件,并将其分页
1 2
from langchain.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() # 用于转分页的 PDF 内容转换成向量,并保存到数据库中
1 2 3
# 之后当用户输入问题时,就到数据库中查询相近的嵌入 q = "What is Link's traditional outfit color?" db.similarity_search(q)[0]
1 2 3 4 5
# 查询结果如下,从数据库中查询到与问题最相关的页面 Document(page_content='''While Link’s traditional green tunic is certainly an iconic look, his wardrobe has expanded [...] Dress for Success''', metadata={'source': 'ExplorersGuide.pdf', 'page': 35} )
1 2 3 4 5 6 7 8 9 10
# 将嵌入整合到聊天机器人中,让聊天机器人可以回答用户的提问
from langchain.chains import RetrievalQA from langchain import OpenAI
{ "schema_version":"v1", "name_for_human":"TODO Plugin", "name_for_model":"todo", "description_for_human":"Plugin for managing a TODO list. \ You can add, remove and view your TODOs.", "description_for_model":"Plugin for managing a TODO list. \ You can add, remove and view your TODOs.", "auth":{ "type":"none" }, "api":{ "type":"openapi", "url":"http://localhost:3333/openapi.yaml", "is_user_authenticated":false }, "logo_url":"http://localhost:3333/logo.png", "contact_email":"support@thecompany.com", "legal_info_url":"http://thecompany-url/legal" }