Building a personal research assistant with AgentScope.
This tutorial guides you from a basic conversational agent, progressively
adding tools, memory, and a knowledge base, to build a fully functional
personal research assistant.Imagine you are a computer science engineer with a keen interest in
the development of artificial intelligence — especially large language
models and the Transformer architecture. You want your assistant to:
Search for the latest academic papers and technical news
Perform Q&A based on downloaded paper PDFs
Remember your research interests and preferences, and proactively
leverage this information in future conversations
You will learn the following core features step by step:
# Install AgentScope (including qdrant dependencies for RAG)pip install 'agentscope[rag]'# If you need long-term memorypip install 'agentscope[reme]'# Set API Keysexport DASHSCOPE_API_KEY=your_dashscope_api_key# If you need Tavily search (optional)export TAVILY_API_KEY=your_tavily_api_key
The mock sections in Part 1 and Part 2 only require DASHSCOPE_API_KEY to run.
import asyncioimport jsonimport osimport subprocessimport urllib.requestfrom datetime import datetimefrom agentscope.agent import ReActAgentfrom agentscope.formatter import DashScopeChatFormatterfrom agentscope.memory import InMemoryMemoryfrom agentscope.message import Msg, TextBlockfrom agentscope.model import DashScopeChatModelfrom agentscope.tool import Toolkit, ToolResponse, execute_python_codeapi_key = os.environ.get("DASHSCOPE_API_KEY", "")def download_file(url: str, path: str) -> None: """Download a file, falling back to curl if urllib fails.""" try: urllib.request.urlretrieve(url, path) except Exception: # (Bad file descriptor). Use curl as a robust fallback. subprocess.run( ["curl", "-fSL", "-o", path, url], check=True, )# ------------------------------------------------------------------ ## Pre-download: fetch the paper PDF before any asyncio.run() calls, ## because repeated event-loop creation can corrupt urllib's sockets. ## ------------------------------------------------------------------ #paper_url = "https://arxiv.org/pdf/1706.03762"paper_path = os.path.join( os.path.dirname(__file__), "attention_is_all_you_need.pdf",)if not os.path.exists(paper_path): print("Downloading paper PDF (required for Part 4)...") download_file(paper_url, paper_path) print(f"Download complete: {paper_path}")else: print(f"Paper already exists: {paper_path}")
Let’s start by creating a basic research assistant. Note that the
sys_prompt includes a user profile.
model = DashScopeChatModel( model_name="qwen-max", api_key=api_key, stream=True,)assistant = ReActAgent( name="ResearchAssistant", sys_prompt=( "You are a professional research assistant. " "The user is particularly interested in AI development, " "especially the progress of large language models. " "Your answers should be accurate and well-organized." ), model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=Toolkit(),)
async def demo_basic_chat() -> None: """Basic conversation: the agent responds to the user's introduction.""" response = await assistant( Msg( "User", "Hello! I'm a computer engineer who has recently become very " "interested in large language model technologies. " "What can you help me with?", "user", ), ) print("Assistant:", response.get_text_content())asyncio.run(demo_basic_chat())
Tools are just ordinary Python functions that return a ToolResponse
object. AgentScope automatically extracts the JSON Schema from the
function signature and docstring.
def search_papers(query: str, limit: int = 5) -> ToolResponse: """Search for academic papers. Args: query (str): Search keywords, e.g. "transformer attention mechanism" limit (int): Number of results to return, default is 5 """ mock_papers = [ { "title": "Attention Is All You Need", "authors": "Vaswani et al.", "year": 2017, "citations": 120000, "summary": "Proposes the Transformer architecture, replacing " "RNN/CNN with Self-Attention", }, { "title": "BERT: Pre-training of Deep Bidirectional Transformers", "authors": "Devlin et al.", "year": 2018, "citations": 95000, "summary": "A pre-trained language model based on the " "Transformer Encoder", }, { "title": "Language Models are Few-Shot Learners (GPT-3)", "authors": "Brown et al.", "year": 2020, "citations": 35000, "summary": "Demonstrates the few-shot learning capabilities " "of large-scale language models", }, ] result = ( f"Found {len(mock_papers)} papers related to \"{query}\":\n\n" ) for i, p in enumerate(mock_papers[:limit], 1): result += ( f"{i}. {p['title']} ({p['authors']}, {p['year']})\n" f" Citations: {p['citations']} | Summary: {p['summary']}\n\n" ) return ToolResponse(content=[TextBlock(type="text", text=result)])def take_notes(title: str, content: str) -> ToolResponse: """Save research notes to a local file. Args: title (str): Note title content (str): Note content """ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M") with open("research_notes.md", "a", encoding="utf-8") as f: f.write(f"## [{timestamp}] {title}\n\n{content}\n\n---\n\n") return ToolResponse( content=[TextBlock(type="text", text=f"Note saved: {title}")], )
In addition to custom tools, AgentScope provides built-in tool functions
such as execute_python_code that can be registered directly.
toolkit = Toolkit()toolkit.register_tool_function(search_papers)toolkit.register_tool_function(execute_python_code)toolkit.register_tool_function(take_notes)print("Registered tools:")for name in toolkit.tools: print(f" - {name}")print("\nTool JSON Schema:")print(json.dumps(toolkit.get_json_schemas(), indent=2, ensure_ascii=False))
Create a tool-equipped agent and test it.
research_agent = ReActAgent( name="ResearchAssistant", sys_prompt=( "You are a research assistant serving a computer engineer " "interested in AI and Transformer technologies. " "When the user's question requires searching papers or computation, " "proactively use the appropriate tools. " "Use the take_notes tool to record important information." ), model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=toolkit,)async def demo_mock_tools() -> None: """Demonstrate the agent with mock tools.""" # The agent will automatically decide to call search_papers await research_agent( Msg("User", "Help me search for papers on the Transformer " "architecture", "user"), ) # The agent will call execute_python_code await research_agent( Msg( "User", "Please calculate the parameter count of BERT-base: " "12 layers * 768 dims * 768 dims * 4, and return the result", "user", ), ) # The agent will call take_notes await research_agent( Msg( "User", "Please make a note: The core innovation of Transformer is " "the Self-Attention mechanism, which enables parallel " "processing of sequential data", "user", ), )asyncio.run(demo_mock_tools())
2.2 Connecting to Real Search — Tavily MCP (Optional)
MCP (Model Context Protocol) is a
standardized tool protocol. AgentScope natively supports MCP, enabling
plug-and-play integration with various external services.Tavily is a search API designed for AI agents,
providing an MCP Server. Once connected, the agent gains real internet
search capabilities.
Note: This code requires setting the TAVILY_API_KEY environment
variable and installing Node.js (for npx to launch the Tavily
MCP Server). If you don’t have these set up yet, you can skip this
section and continue using the mock tools from Part 2.1.
from agentscope.mcp import StdIOStatefulClientasync def demo_tavily_mcp(): tavily_client = StdIOStatefulClient( name="tavily", command="npx", args=["-y", "tavily-mcp@latest"], env={"TAVILY_API_KEY": os.environ["TAVILY_API_KEY"]}, ) await tavily_client.connect() mcp_toolkit = Toolkit() await mcp_toolkit.register_mcp_client(tavily_client) mcp_toolkit.register_tool_function(execute_python_code) mcp_toolkit.register_tool_function(take_notes) mcp_agent = ReActAgent( name="ResearchAssistant", sys_prompt="You are a research assistant that uses search " "tools to get the latest information.", model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=mcp_toolkit, ) await mcp_agent( Msg("User", "Search for the latest improvements to the " "Transformer architecture in 2025", "user"), ) await tavily_client.close()asyncio.run(demo_tavily_mcp())
Memory enables the agent to not only remember “what was just said”
(short-term memory), but also to remember “who you are” across
sessions (long-term memory).
InMemoryMemory is the most basic memory component.
memory = InMemoryMemory()async def demo_short_term_memory() -> None: """Short-term memory: add, retrieve, and check size.""" await memory.add( Msg("User", "I'm a computer engineer, mainly working on Agent " "application development", "user"), ) await memory.add( Msg("Assistant", "Hello! Agent development involves many " "interesting technologies.", "assistant"), ) await memory.add( Msg("User", "Recently I've become very interested in large " "language models and the Transformer architecture", "user"), ) await memory.add( Msg( "Assistant", "Transformer is one of the most important architectures " "in the AI field today.", "assistant", ), ) messages = await memory.get_memory() print(f"Memory contains {len(messages)} messages") size = await memory.size() print(f"Memory size: {size}")asyncio.run(demo_short_term_memory())
Short-term memory only persists within a single session.
ReMePersonalLongTermMemory, based on the
ReMe framework, can store
and retrieve user profile information across sessions.
Note: Using long-term memory requires an additional installation:
pip install 'agentscope[reme]'.
The code below demonstrates the workflow of recording and retrieving
user profiles. We write user background information (computer engineer,
interested in AI) into long-term memory, which the agent can retrieve
at any time.
from agentscope.memory import ReMePersonalLongTermMemoryfrom agentscope.embedding import DashScopeTextEmbeddinglong_term_memory = ReMePersonalLongTermMemory( agent_name="ResearchAssistant", user_name="researcher", model=DashScopeChatModel( model_name="qwen-max", api_key=os.environ["DASHSCOPE_API_KEY"], stream=False, ), embedding_model=DashScopeTextEmbedding( model_name="text-embedding-v4", api_key=os.environ["DASHSCOPE_API_KEY"], dimensions=1024, ), vector_store_dir="./memory_store",)async def demo_long_term_memory(): async with long_term_memory: # Record user profile await long_term_memory.record_to_memory( thinking="Record the user's background and research " "interests", content=[ "User is a computer science engineer working on " "LLM Agent development", "User is very interested in large language models " "and the Transformer architecture", "User is studying the Attention Is All You Need paper", "User prefers using Python for technical experiments", ], ) # Retrieve result = await long_term_memory.retrieve_from_memory( keywords=["research interests", "technical background"], limit=3, ) for block in result.content: print(block["text"])
When integrating with an agent, the recommended approach is to use
agent_control mode, allowing the agent to manage memory
autonomously:
agent = ReActAgent( name="ResearchAssistant", sys_prompt="You are a research assistant with long-term memory " "capabilities. ...", model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=toolkit, long_term_memory=long_term_memory, long_term_memory_mode="agent_control",)
Part 4: Giving the Assistant “Knowledge” — RAG Integration
Retrieval-Augmented Generation (RAG) enables the
agent to retrieve information from documents you provide, rather than
relying solely on knowledge from its training data.
Register knowledge retrieval as a tool, letting the agent autonomously
decide when to query the knowledge base.
Tip: An alternative approach is Generic RAG — pass knowledge
directly to ReActAgent’s knowledge parameter for automatic
retrieval each turn. Agentic RAG offers more flexibility, while
Generic RAG is simpler.
async def demo_agentic_rag() -> None: """Agentic RAG: the agent uses the retrieve_knowledge tool.""" rag_toolkit = Toolkit() rag_toolkit.register_tool_function(search_papers) rag_toolkit.register_tool_function(execute_python_code) rag_toolkit.register_tool_function( knowledge.retrieve_knowledge, func_description=( "Retrieve relevant information from the Transformer paper " "knowledge base. Use this when the user asks about technical " "details such as Transformer architecture, Self-Attention, " "Multi-Head Attention, etc." ), ) rag_agent = ReActAgent( name="ResearchAssistant", sys_prompt=( "You are a research assistant serving a computer engineer " "interested in AI. " "You have a knowledge base built from the Transformer paper. " "When the user asks about technical details, first use " "retrieve_knowledge to search the paper content, " "then answer based on the retrieved results. " "If the knowledge base does not contain relevant information, " "honestly state so." ), model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=rag_toolkit, ) await rag_agent( Msg( "User", "How does Multi-Head Attention work in Transformer?", "user", ), ) await rag_agent( Msg( "User", "What method does the paper use for Positional Encoding?", "user", ), )asyncio.run(demo_agentic_rag())
from agentscope.token import CharTokenCounterasync def demo_full_assistant() -> None: """Complete research assistant: tools + memory compression + RAG.""" # Toolkit full_toolkit = Toolkit() full_toolkit.register_tool_function(search_papers) full_toolkit.register_tool_function(execute_python_code) full_toolkit.register_tool_function(take_notes) full_toolkit.register_tool_function( knowledge.retrieve_knowledge, func_description="Retrieve information from the Transformer paper " "knowledge base", ) # Complete Agent full_assistant = ReActAgent( name="ResearchAssistant", sys_prompt=( "You are a professional research assistant serving a computer " "engineer.\n\n" "## Your Capabilities:\n" "1. Paper Search: Use search_papers to search for academic " "papers\n" "2. Knowledge Retrieval: Use retrieve_knowledge to retrieve " "information from the Transformer paper knowledge base\n" "3. Code Execution: Use execute_python_code to run Python " "code\n" "4. Note Taking: Use take_notes to save research notes\n\n" "## Working Principles:\n" "- When answering technical questions, prioritize retrieving " "paper content from the knowledge base as evidence\n" "- Use the code tool for calculations to ensure accuracy\n" "- Answers should be accurate, well-organized, and include " "proper citations where appropriate" ), model=model, formatter=DashScopeChatFormatter(), memory=InMemoryMemory(), toolkit=full_toolkit, compression_config=ReActAgent.CompressionConfig( enable=True, agent_token_counter=CharTokenCounter(), trigger_threshold=5000, keep_recent=5, ), ) conversations = [ "Hello! I'm a computer engineer interested in AI. " "What can you help me with?", "Help me search the knowledge base for the computational " "complexity of Self-Attention in Transformer", "Calculate: sequence length n=512, model dimension d=768, " "what is n*n*d?", "Help me search for recent papers on improving Transformer " "efficiency", "Record today's key point: Self-Attention complexity O(n^2*d) " "is the bottleneck for long sequences", "Summarize what we discussed today", ] for user_input in conversations: print(f"\n{'=' * 60}") print(f"User: {user_input}") response = await full_assistant( Msg("User", user_input, "user"), ) print(f"Assistant: {response.get_text_content()}")asyncio.run(demo_full_assistant())