代码
import asyncio
import numpy as np
import os
from openai import AsyncOpenAI
from functools import wraps
from nano_vectordb import NanoVectorDB
from dataclasses import dataclass, field
import logging
logger = logging.getLogger("lightrag")
from tqdm.asyncio import tqdm as tqdm_async
import time
import asyncio
import numpy as np
class UnlimitedSemaphore:
"""A context manager that allows unlimited access."""
async def __aenter__(self):
pass
async def __aexit__(self, exc_type, exc, tb):
pass
@dataclass
class EmbeddingFunc:
embedding_dim: int
max_token_size: int
func: callable
concurrent_limit: int = 16
def __post_init__(self):
if self.concurrent_limit != 0:
self._semaphore = asyncio.Semaphore(self.concurrent_limit)
else:
self._semaphore = UnlimitedSemaphore()
async def __call__(self, *args, **kwargs) -> np.ndarray:
async with self._semaphore:
return await self.func(*args, **kwargs)
@dataclass
class StorageNameSpace:
namespace: str
global_config: dict
async def index_done_callback(self):
"""commit the storage operations after indexing"""
pass
async def query_done_callback(self):
"""commit the storage operations after querying"""
pass
@dataclass
class BaseVectorStorage(StorageNameSpace):
embedding_func: EmbeddingFunc
meta_fields: set = field(default_factory=set)
async def query(self, query: str, top_k: int) -> list[dict]:
raise NotImplementedError
async def upsert(self, data: dict[str, dict]):
"""Use 'content' field from value for embedding, use key as id.
If embedding_func is None, use 'embedding' field from value
"""
raise NotImplementedError
@dataclass
class NanoVectorDBStorage(BaseVectorStorage):
cosine_better_than_threshold: float = 0.2
def __post_init__(self):
# 获取全局配置中的工作目录,并生成客户端文件名
self._client_file_name = os.path.join(
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
)
# 从全局配置中获取最大批处理大小
self._max_batch_size = self.global_config["embedding_batch_num"]
# 初始化NanoVectorDB客户端,指定嵌入维度和存储文件
self._client = NanoVectorDB(
self.embedding_func.embedding_dim, storage_file=self._client_file_name
)
# 获取全局配置中的余弦相似度阈值,如果没有则使用默认值
self.cosine_better_than_threshold = self.global_config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
async def upsert(self, data: dict[str, dict]):
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
current_time = time.time()
list_data = [
{
"__id__": k,
"__created_at__": current_time,
**{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
}
for k, v in data.items()
]
contents = [v["content"] for v in data.values()]
batches = [
contents[i : i + self._max_batch_size]
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)
if len(embeddings) == len(list_data):
for i, d in enumerate(list_data):
d["__vector__"] = embeddings[i]
results = self._client.upsert(datas=list_data)
return results
else:
# sometimes the embedding is not returned correctly. just log it.
logger.error(
f"embedding is not 1-1 with data, {len(embeddings)} != {len(list_data)}"
)
async def index_done_callback(self):
self._client.save()
chunks = {'chunk-9e3921da66da5d761ab73cd849af6c43': {'tokens': 1200,
'content': '\ufeffThe Project Gutenberg eBook of A Christmas Carol\n \nThis ebook is for the use of anyone anywhere in the United States and\nmost other parts of the world at no cost and with almost no restrictions\nwhatsoever. You may copy it, give it away or re-use it under the terms\nof the Project Gutenberg License included with this ebook or online\nat www.gutenberg.org. If you are not located in the United States,\nyou will have to check the laws of the country where you are located\nbefore using this eBook.\n\nTitle: A Christmas Carol\n\nAuthor: Charles Dickens\n\nIllustrator: Arthur Rackham\n\nRelease date: December 24, 2007 [eBook #24022]\n\nLanguage: English\n\nOriginal publication: Philadelphia and New York: J. B. Lippincott Company,, 1915\n\nCredits: Produced by Suzanne Shell, Janet Blenkinship and the Online\n Distributed Proofreading Team at http://www.pgdp.net\n\n\n*** START OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n\n\nProduced by Suzanne Shell, Janet Blenkinship and the Online\nDistributed Proofreading Team at http://www.pgdp.net\n\n\n\n\n\n\n\n\n\n\n\n A CHRISTMAS CAROL\n\n [Illustration: _"How now?" said Scrooge, caustic and cold as ever.\n "What do you want with me?"_]\n\n\n A CHRISTMAS CAROL\n\n [Illustration]\n\n BY\n\n CHARLES DICKENS\n\n [Illustration]\n\n ILLUSTRATED BY ARTHUR RACKHAM\n\n [Illustration]\n\n J. B. LIPPINCOTT COMPANY PHILADELPHIA AND NEW YORK\n\n FIRST PUBLISHED 1915\n\n REPRINTED 1923, 1927, 1932, 1933, 1934, 1935, 1947, 1948, 1952, 1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1972, 1973\n\n ISBN: 0-397-00033-2\n\n PRINTED IN GREAT BRITAIN\n\n\n\n\n PREFACE\n\n I have endeavoured in this Ghostly little book to raise the Ghost of an\n Idea which shall not put my readers out of humour with themselves, with\n each other, with the season, or with me. May it haunt their house\n pleasantly, and no one wish to lay it.\n\n Their faithful Friend and Servant,\n\n C. D.\n\n _December, 1843._\n\n\n\n\n CHARACTERS\n\n Bob Cratchit, clerk to Ebenezer Scrooge.\n Peter Cratchit, a son of the preceding.\n Tim Cratchit ("Tiny Tim"), a cripple, youngest son of Bob Cratchit.\n Mr. Fezziwig, a kind-hearted, jovial old merchant.\n Fred, Scrooge\'s nephew.\n Ghost of Christmas Past, a phantom showing things past.\n Ghost of Christmas Present, a spirit of a kind, generous,\n and hearty nature.\n Ghost of Christmas Yet to Come, an apparition showing the shadows\n of things which yet may happen.\n Ghost of Jacob Marley, a spectre of Scrooge\'s former partner in business.\n Joe, a marine-store dealer and receiver of stolen goods.\n Ebenezer Scrooge, a grasping, covetous old man, the surviving partner\n of the firm of Scrooge and Marley.\n Mr. Topper, a bachelor.\n Dick Wilkins, a fellow apprentice of Scrooge\'s.\n\n Belle, a comely matron, an old sweetheart of Scrooge\'s.\n Caroline, wife of one of Scrooge\'s debtors.\n Mrs. Cratchit, wife of Bob Cratchit.\n Belinda and Martha Cratchit, daughters of the preceding.\n\n Mrs. Dilber, a laundress.\n Fan, the sister of Scrooge.\n Mrs. Fezziwig, the worthy partner of Mr. Fezziwig.\n\n\n\n\n CONTENTS\n\n STAVE ONE--MARLEY\'S GHOST 3\n STAVE TWO--THE FIRST OF THE THREE SPIRITS 37\n STAVE THREE--THE SECOND OF THE THREE SPIRITS 69\n STAVE FOUR--THE LAST OF THE SPIRITS 111\n STAVE FIVE--THE END OF IT 137\n\n\n LIST OF ILLUSTRATIONS\n\n _IN COLOUR_\n\n\n "How now?" said Scrooge, caustic\n and cold as ever. "What do you\n want with me?" _Frontispiece_\n\n Bob Cratchit went down a slide on\n Cornhill, at the end of a lane of\n boys, twenty times, in honour of\n its being Christmas Eve 16\n\n Nobody under the bed; nobody in\n the closet; nobody in his dressing-gown,\n which was hanging up\n in a suspicious attitude against\n the wall 20\n\n The air was filled with phantoms,\n wandering hither and thither in\n restless haste and moaning as\n they went 32\n\n Then old Fezziwig stood out to\n dance with Mrs. Fezziwig 54\n\n A flushed and boisterous group 62\n\n Laden with Christmas toys and\n presents 64\n\n The way he went after that plump\n sister in the lace tucker! 100\n\n "How are you?" said one.\n "How are you?"',
'chunk_order_index': 0,
'full_doc_id': 1},
'chunk-89777b838d5447c7bd1ec11282c4ee89': {'tokens': 1200,
'content': 'restless haste and moaning as\n they went 32\n\n Then old Fezziwig stood out to\n dance with Mrs. Fezziwig 54\n\n A flushed and boisterous group 62\n\n Laden with Christmas toys and\n presents 64\n\n The way he went after that plump\n sister in the lace tucker! 100\n\n "How are you?" said one.\n "How are you?" returned the other.\n "Well!" said the first. "Old\n Scratch has got his own at last,\n hey?" 114\n\n "What do you call this?" said Joe.\n "Bed-curtains!" "Ah!" returned\n the woman, laughing....\n "Bed-curtains!"\n\n "You don\'t mean to say you took\n \'em down, rings and all, with him\n lying there?" said Joe.\n\n "Yes, I do," replied the woman.\n "Why not?" 120\n\n "It\'s I, your uncle Scrooge. I have\n come to dinner. Will you let\n me in, Fred?" 144\n\n "Now, I\'ll tell you what, my friend,"\n said Scrooge. "I am not going\n to stand this sort of thing any\n longer." 146\n\n[Illustration]\n\n_IN BLACK AND WHITE_\n\n\n Tailpiece vi\n Tailpiece to List of Coloured Illustrations x\n Tailpiece to List of Black and White Illustrations xi\n Heading to Stave One 3\n They were portly gentlemen, pleasant to behold 12\n On the wings of the wind 28-29\n Tailpiece to Stave One 34\n Heading to Stave Two 37\n He produced a decanter of curiously\n light wine and a block of curiously heavy cake 50\n She left him, and they parted 60\n Tailpiece to Stave Two 65\n Heading to Stave Three 69\n There was nothing very cheerful in the climate 75\n He had been Tim\'s blood-horse all the way from church 84-85\n With the pudding 88\n Heading to Stave Four 111\n Heading to Stave Five 137\n Tailpiece to Stave Five 147\n\n[Illustration]\n\n\nSTAVE ONE\n\n\n[Illustration]\n\n\n\n\nMARLEY\'S GHOST\n\n\nMarley was dead, to begin with. There is no doubt whatever about that.\nThe register of his burial was signed by the clergyman, the clerk, the\nundertaker, and the chief mourner. Scrooge signed it. And Scrooge\'s name\nwas good upon \'Change for anything he chose to put his hand to. Old\nMarley was as dead as a door-nail.\n\nMind! I don\'t mean to say that I know of my own knowledge, what there is\nparticularly dead about a door-nail. I might have been inclined, myself,\nto regard a coffin-nail as the deadest piece of ironmongery in the\ntrade. But the wisdom of our ancestors is in the simile; and my\nunhallowed hands shall not disturb it, or the country\'s done for. You\nwill, therefore, permit me to repeat, emphatically, that Marley was as\ndead as a door-nail.\n\nScrooge knew he was dead? Of course he did. How could it be otherwise?\nScrooge and he were partners for I don\'t know how many years. Scrooge\nwas his sole executor, his sole administrator, his sole assign, his sole\nresiduary legatee, his sole friend, and sole mourner. And even Scrooge\nwas not so dreadfully cut up by the sad event but that he was an\nexcellent man of business on the very day of the funeral, and solemnised\nit with an undoubted bargain.\n\nThe mention of Marley\'s funeral brings me back to the point I started\nfrom. There is no doubt that Marley was dead. This must be distinctly\nunderstood, or nothing wonderful can come of the story I am going to\nrelate. If we were not perfectly convinced that Hamlet\'s father died\nbefore the play began, there would be nothing more remarkable in his\ntaking a stroll at night, in an easterly wind, upon his own ramparts,\nthan there would be in any other middle-aged gentleman rashly turning\nout after dark in a breezy spot--say St. Paul\'s Churchyard, for\ninstance--literally to astonish his son\'s weak mind.\n\nScrooge never painted out Old Marley\'s name. There it stood, years\nafterwards, above the warehouse door: Scrooge and Marley. The firm was\nknown as Scrooge and Marley. Sometimes people new to the business called\nScrooge Scrooge, and sometimes Marley, but he answered to both names. It\nwas all the same to him.\n\nOh! but he was a tight-fisted hand at the grindstone, Scrooge! a\nsqueezing, wrenching, grasping, scraping, clutching, covetous old\nsinner! Hard and sharp as flint, from which no steel had ever struck out\ngenerous fire; secret, and self-contained, and solitary as an oyster.\nThe cold within him froze his old features, nipped his pointed nose,\nshrivelled his cheek, stiffened his gait; made his eyes red, his thin\nlips blue; and spoke out shrewdly in his grating voice. A frosty rime',
'chunk_order_index': 1,
'full_doc_id': 1},
'chunk-74e2466de2f67fd710ef2f20c0a8d9e0': {'tokens': 1200,
'content': "clutching, covetous old\nsinner! Hard and sharp as flint, from which no steel had ever struck out\ngenerous fire; secret, and self-contained, and solitary as an oyster.\nThe cold within him froze his old features, nipped his pointed nose,\nshrivelled his cheek, stiffened his gait; made his eyes red, his thin\nlips blue; and spoke out shrewdly in his grating voice. A frosty rime\nwas on his head, and on his eyebrows, and his wiry chin. He carried his\nown low temperature always about with him; he iced his office in the\ndog-days, and didn't thaw it one degree at Christmas.\n\nExternal heat and cold had little influence on Scrooge. No warmth could\nwarm, no wintry weather chill him. No wind that blew was bitterer than\nhe, no falling snow was more intent upon its purpose, no pelting rain\nless open to entreaty. Foul weather didn't know where to have him. The\nheaviest rain, and snow, and hail, and sleet could boast of the\nadvantage over him in only one respect. They often 'came down'\nhandsomely, and Scrooge never did.\n\nNobody ever stopped him in the street to say, with gladsome looks, 'My\ndear Scrooge, how are you? When will you come to see me?' No beggars\nimplored him to bestow a trifle, no children asked him what it was\no'clock, no man or woman ever once in all his life inquired the way to\nsuch and such a place, of Scrooge. Even the blind men's dogs appeared to\nknow him; and, when they saw him coming on, would tug their owners into\ndoorways and up courts; and then would wag their tails as though they\nsaid, 'No eye at all is better than an evil eye, dark master!'\n\nBut what did Scrooge care? It was the very thing he liked. To edge his\nway along the crowded paths of life, warning all human sympathy to keep\nits distance, was what the knowing ones call 'nuts' to Scrooge.\n\nOnce upon a time--of all the good days in the year, on Christmas\nEve--old Scrooge sat busy in his counting-house. It was cold, bleak,\nbiting weather; foggy withal; and he could hear the people in the court\noutside go wheezing up and down, beating their hands upon their breasts,\nand stamping their feet upon the pavement stones to warm them. The City\nclocks had only just gone three, but it was quite dark already--it had\nnot been light all day--and candles were flaring in the windows of the\nneighbouring offices, like ruddy smears upon the palpable brown air. The\nfog came pouring in at every chink and keyhole, and was so dense\nwithout, that, although the court was of the narrowest, the houses\nopposite were mere phantoms. To see the dingy cloud come drooping down,\nobscuring everything, one might have thought that nature lived hard by,\nand was brewing on a large scale.\n\nThe door of Scrooge's counting-house was open, that he might keep his\neye upon his clerk, who in a dismal little cell beyond, a sort of tank,\nwas copying letters. Scrooge had a very small fire, but the clerk's fire\nwas so very much smaller that it looked like one coal. But he couldn't\nreplenish it, for Scrooge kept the coal-box in his own room; and so\nsurely as the clerk came in with the shovel, the master predicted that\nit would be necessary for them to part. Wherefore the clerk put on his\nwhite comforter, and tried to warm himself at the candle; in which\neffort, not being a man of strong imagination, he failed.\n\n'A merry Christmas, uncle! God save you!' cried a cheerful voice. It was\nthe voice of Scrooge's nephew, who came upon him so quickly that this\nwas the first intimation he had of his approach.\n\n'Bah!' said Scrooge. 'Humbug!'\n\nHe had so heated himself with rapid walking in the fog and frost, this\nnephew of Scrooge's, that he was all in a glow; his face was ruddy and\nhandsome; his eyes sparkled, and his breath smoked again.\n\n'Christmas a humbug, uncle!' said Scrooge's nephew. 'You don't mean\nthat, I am sure?'\n\n'I do,' said Scrooge. 'Merry Christmas! What right have you to be merry?\nWhat reason have you to be merry? You're poor enough.'\n\n'Come, then,' returned the nephew gaily. 'What right have you to be\ndismal? What reason have you to be morose? You're rich enough.'\n\nScrooge, having no better answer ready on the spur of the moment, said,\n'Bah!' again; and followed it up with 'Humbug!'\n\n'Don't be cross, uncle!' said the nephew.\n\n'What else can I be,' returned the uncle, 'when I live in such a world\nof fools as this? Merry Christmas! Out upon merry Christmas! What's\nChristmas-time to you but a time for paying bills without money; a time\nfor finding yourself a year older, and not an hour richer; a time for\nbalancing your books, and having every item in 'em through a round dozen\nof months presented dead against you? If I could work my will,' said\nScrooge indignantly, 'every idiot who goes",
'chunk_order_index': 2,
'full_doc_id': 1},
'chunk-5dac41b3f9eeaf794f0147400b1718cd': {'tokens': 1200,
'content': 'when I live in such a world\nof fools as this? Merry Christmas! Out upon merry Christmas! What\'s\nChristmas-time to you but a time for paying bills without money; a time\nfor finding yourself a year older, and not an hour richer; a time for\nbalancing your books, and having every item in \'em through a round dozen\nof months presented dead against you? If I could work my will,\' said\nScrooge indignantly, \'every idiot who goes about with "Merry Christmas"\non his lips should be boiled with his own pudding, and buried with a\nstake of holly through his heart. He should!\'\n\n\'Uncle!\' pleaded the nephew.\n\n\'Nephew!\' returned the uncle sternly, \'keep Christmas in your own way,\nand let me keep it in mine.\'\n\n\'Keep it!\' repeated Scrooge\'s nephew. \'But you don\'t keep it.\'\n\n\'Let me leave it alone, then,\' said Scrooge. \'Much good may it do you!\nMuch good it has ever done you!\'\n\n\'There are many things from which I might have derived good, by which I\nhave not profited, I dare say,\' returned the nephew; \'Christmas among\nthe rest. But I am sure I have always thought of Christmas-time, when\nit has come round--apart from the veneration due to its sacred name and\norigin, if anything belonging to it can be apart from that--as a good\ntime; a kind, forgiving, charitable, pleasant time; the only time I know\nof, in the long calendar of the year, when men and women seem by one\nconsent to open their shut-up hearts freely, and to think of people\nbelow them as if they really were fellow-passengers to the grave, and\nnot another race of creatures bound on other journeys. And therefore,\nuncle, though it has never put a scrap of gold or silver in my pocket, I\nbelieve that it _has_ done me good and _will_ do me good; and I say, God\nbless it!\'\n\nThe clerk in the tank involuntarily applauded. Becoming immediately\nsensible of the impropriety, he poked the fire, and extinguished the\nlast frail spark for ever.\n\n\'Let me hear another sound from _you_,\' said Scrooge, \'and you\'ll keep\nyour Christmas by losing your situation! You\'re quite a powerful\nspeaker, sir,\' he added, turning to his nephew. \'I wonder you don\'t go\ninto Parliament.\'\n\n\'Don\'t be angry, uncle. Come! Dine with us to-morrow.\'\n\nScrooge said that he would see him----Yes, indeed he did. He went the\nwhole length of the expression, and said that he would see him in that\nextremity first.\n\n\'But why?\' cried Scrooge\'s nephew. \'Why?\'\n\n\'Why did you get married?\' said Scrooge.\n\n\'Because I fell in love.\'\n\n\'Because you fell in love!\' growled Scrooge, as if that were the only\none thing in the world more ridiculous than a merry Christmas. \'Good\nafternoon!\'\n\n\'Nay, uncle, but you never came to see me before that happened. Why give\nit as a reason for not coming now?\'\n\n\'Good afternoon,\' said Scrooge.\n\n\'I want nothing from you; I ask nothing of you; why cannot we be\nfriends?\'\n\n\'Good afternoon!\' said Scrooge.\n\n\'I am sorry, with all my heart, to find you so resolute. We have never\nhad any quarrel to which I have been a party. But I have made the trial\nin homage to Christmas, and I\'ll keep my Christmas humour to the last.\nSo A Merry Christmas, uncle!\'\n\n\'Good afternoon,\' said Scrooge.\n\n\'And A Happy New Year!\'\n\n\'Good afternoon!\' said Scrooge.\n\nHis nephew left the room without an angry word, notwithstanding. He\nstopped at the outer door to bestow the greetings of the season on the\nclerk, who, cold as he was, was warmer than Scrooge; for he returned\nthem cordially.\n\n\'There\'s another fellow,\' muttered Scrooge, who overheard him: \'my\nclerk, with fifteen shillings a week, and a wife and family, talking\nabout a merry Christmas. I\'ll retire to Bedlam.\'\n\nThis lunatic, in letting Scrooge\'s nephew out, had let two other people\nin. They were portly gentlemen, pleasant to behold, and now stood, with\ntheir hats off, in Scrooge\'s office. They had books and papers in their\nhands, and bowed to him.\n\n\'Scrooge and Marley\'s, I believe,\' said one of the gentlemen, referring\nto his list. \'Have I the pleasure of addressing Mr. Scrooge, or Mr.\nMarley?\'\n\n\'Mr. Marley has been dead these seven years,\' Scrooge replied. \'He died\nseven years ago, this very night.\'\n\n\'We have no doubt his liberality is well represented by his surviving\npartner,\' said the gentleman, presenting his credentials.\n\n[Illustration: THEY WERE PORTLY GENTLEMEN, PLEASANT TO BEHOLD]\n\nIt certainly was; for they had been two kindred spirits. At the ominous\nword \'liberality\' Scrooge frowned, and shook his head, and handed the\ncredentials back.\n\n\'At this festive season of the year, Mr. Scrooge,\' said the gentleman,\ntaking up a pen, \'it is more than usually desirable that we should make\nsome slight provision for the poor and destitute, who suffer greatly at\nthe present time. Many thousands are',
'chunk_order_index': 3,
'full_doc_id': 1},
'chunk-90d5764e301321c087f5a8f78b73a145': {'tokens': 1200,
'content': "It certainly was; for they had been two kindred spirits. At the ominous\nword 'liberality' Scrooge frowned, and shook his head, and handed the\ncredentials back.\n\n'At this festive season of the year, Mr. Scrooge,' said the gentleman,\ntaking up a pen, 'it is more than usually desirable that we should make\nsome slight provision for the poor and destitute, who suffer greatly at\nthe present time. Many thousands are in want of common necessaries;\nhundreds of thousands are in want of common comforts, sir.'\n\n'Are there no prisons?' asked Scrooge.\n\n'Plenty of prisons,' said the gentleman, laying down the pen again.\n\n'And the Union workhouses?' demanded Scrooge. 'Are they still in\noperation?'\n\n'They are. Still,' returned the gentleman, 'I wish I could say they were\nnot.'\n\n'The Treadmill and the Poor Law are in full vigour, then?' said Scrooge.\n\n'Both very busy, sir.'\n\n'Oh! I was afraid, from what you said at first, that something had\noccurred to stop them in their useful course,' said Scrooge. 'I am very\nglad to hear it.'\n\n'Under the impression that they scarcely furnish Christian cheer of mind\nor body to the multitude,' returned the gentleman, 'a few of us are\nendeavouring to raise a fund to buy the Poor some meat and drink, and\nmeans of warmth. We choose this time, because it is a time, of all\nothers, when Want is keenly felt, and Abundance rejoices. What shall I\nput you down for?'\n\n'Nothing!' Scrooge replied.\n\n'You wish to be anonymous?'\n\n'I wish to be left alone,' said Scrooge. 'Since you ask me what I wish,\ngentlemen, that is my answer. I don't make merry myself at Christmas,\nand I can't afford to make idle people merry. I help to support the\nestablishments I have mentioned--they cost enough: and those who are\nbadly off must go there.'\n\n'Many can't go there; and many would rather die.'\n\n'If they would rather die,' said Scrooge, 'they had better do it, and\ndecrease the surplus population. Besides--excuse me--I don't know that.'\n\n'But you might know it,' observed the gentleman.\n\n'It's not my business,' Scrooge returned. 'It's enough for a man to\nunderstand his own business, and not to interfere with other people's.\nMine occupies me constantly. Good afternoon, gentlemen!'\n\nSeeing clearly that it would be useless to pursue their point, the\ngentlemen withdrew. Scrooge resumed his labours with an improved opinion\nof himself, and in a more facetious temper than was usual with him.\n\nMeanwhile the fog and darkness thickened so, that people ran about with\nflaring links, proffering their services to go before horses in\ncarriages, and conduct them on their way. The ancient tower of a church,\nwhose gruff old bell was always peeping slyly down at Scrooge out of a\nGothic window in the wall, became invisible, and struck the hours and\nquarters in the clouds, with tremulous vibrations afterwards, as if its\nteeth were chattering in its frozen head up there. The cold became\nintense. In the main street, at the corner of the court, some labourers\nwere repairing the gas-pipes, and had lighted a great fire in a brazier,\nround which a party of ragged men and boys were gathered: warming their\nhands and winking their eyes before the blaze in rapture. The water-plug\nbeing left in solitude, its overflowings suddenly congealed, and turned\nto misanthropic ice. The brightness of the shops, where holly sprigs and\nberries crackled in the lamp heat of the windows, made pale faces ruddy\nas they passed. Poulterers' and grocers' trades became a splendid joke:\na glorious pageant, with which it was next to impossible to believe that\nsuch dull principles as bargain and sale had anything to do. The Lord\nMayor, in the stronghold of the mighty Mansion House, gave orders to his\nfifty cooks and butlers to keep Christmas as a Lord Mayor's household\nshould; and even the little tailor, whom he had fined five shillings on\nthe previous Monday for being drunk and bloodthirsty in the streets,\nstirred up to-morrow's pudding in his garret, while his lean wife and\nthe baby sallied out to buy the beef.\n\nFoggier yet, and colder! Piercing, searching, biting cold. If the good\nSt. Dunstan had but nipped the Evil Spirit's nose with a touch of such\nweather as that, instead of using his familiar weapons, then indeed he\nwould have roared to lusty purpose. The owner of one scant young nose,\ngnawed and mumbled by the hungry cold as bones are gnawed by dogs,\nstooped down at Scrooge's keyhole to regale him with a Christmas carol;\nbut, at the first sound of\n\n 'God bless you, merry gentleman,\n May nothing you dismay!'\n\nScrooge seized the ruler with such energy of action that the singer fled\nin terror, leaving the keyhole to the fog, and even more congenial\nfrost.\n\nAt length the hour of shutting up the counting-house arrived. With an\nill-will Scrooge dismounted from his stool, and tacitly admitted the\nfact to the expectant clerk in the tank, who instantly snuffed his",
'chunk_order_index': 4,
'full_doc_id': 1}}
import asyncio
import numpy as np
import os
from openai import AsyncOpenAI
from functools import wraps
GLM_API_KEY = "api key"
async def openai_embedding(
texts: list[str],
model: str = "text-embedding-3-small",
base_url: str = None,
api_key: str = None,
) -> np.ndarray:
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
openai_async_client = (
AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
)
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
)
return np.array([dp.embedding for dp in response.data])
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
model="embedding-3",
api_key=GLM_API_KEY,
base_url="https://open.bigmodel.cn/api/paas/v4/",
)
async def get_embedding_dim():
test_text = ["This is a test sentence."]
embedding = await embedding_func(test_text)
embedding_dim = embedding.shape[1]
return embedding_dim
embedding_dimension = await get_embedding_dim()
embedding_dimension
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/embeddings "HTTP/1.1 200 OK"
2048
chunks_vdb = NanoVectorDBStorage(
namespace="chunks",
embedding_func= EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=8192,
func=embedding_func,
),
global_config={"working_dir": "./tmp", "embedding_batch_num": 5}
)
await chunks_vdb.upsert(chunks)
INFO:nano-vectordb:Init {'embedding_dim': 2048, 'metric': 'cosine', 'storage_file': './tmp\\vdb_chunks.json'} 0 data
INFO:lightrag:Inserting 5 vectors to chunks
Generating embeddings: 0%| | 0/1 [00:00<?, ?batch/s]INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/embeddings "HTTP/1.1 200 OK"
Generating embeddings: 100%|██████████| 1/1 [00:02<00:00, 2.46s/batch]
{'update': [],
'insert': ['chunk-9e3921da66da5d761ab73cd849af6c43',
'chunk-89777b838d5447c7bd1ec11282c4ee89',
'chunk-74e2466de2f67fd710ef2f20c0a8d9e0',
'chunk-5dac41b3f9eeaf794f0147400b1718cd',
'chunk-90d5764e301321c087f5a8f78b73a145']}
WORKING_DIR = "./tmp"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
await chunks_vdb.index_done_callback()
代码解释
这段代码的主要逻辑是 将文本数据转换为向量,并存储到 NanoVectorDB 向量数据库,以支持后续的向量检索。具体步骤如下:
1. 代码整体流程
(1)初始化 NanoVectorDB 存储
- 定义
NanoVectorDBStorage
作为向量存储的基类- 继承
BaseVectorStorage
- 采用 NanoVectorDB 作为底层存储
- 设定向量的存储文件路径(
self._client_file_name
) - 设定最大批处理大小(
self._max_batch_size
) - 设定余弦相似度阈值(
cosine_better_than_threshold
)
- 继承
- 定义
EmbeddingFunc
处理文本到向量的转换- 设定
embedding_dim
(嵌入维度) - 设定
max_token_size
(最大 token 限制) - 设定
func
(实际的嵌入计算函数) - 采用
asyncio.Semaphore
进行并发控制(防止 API 请求过载)
- 设定
(2)加载并嵌入文本数据
- 初始化 NanoVectorDB 存储实例
- 设定
namespace="chunks"
(命名空间) - 设定
embedding_dim=2048
- 设定
embedding_batch_num=5
(分批嵌入)
- 设定
- 向数据库中插入数据
upsert(data: dict[str, dict])
- 提取
content
进行嵌入计算 - 批量提交到 NanoVectorDB 进行存储
- 记录
__id__
和__vector__
,并关联元数据(meta_fields
)
(3)文本向量化的 API 调用
openai_embedding()
- 通过
AsyncOpenAI()
调用 API 计算文本向量 - 采用
text-embedding-3-small
模型 - 解析 API 返回的向量数据
- 通过
embedding_func()
- 采用
openai_embedding()
进行嵌入 - 指定
base_url="https://open.bigmodel.cn/api/paas/v4/"
- 采用
get_embedding_dim()
- 计算
embedding_dim
- 通过
await embedding_func(["This is a test sentence."])
获取向量维度(2048)
- 计算
(4)向量数据库操作
- 插入文本块
await chunks_vdb.upsert(chunks)
chunks
是待处理的文本块,每个chunk
具有content
- 先计算嵌入,再存入 NanoVectorDB
- 保存索引
await chunks_vdb.index_done_callback()
- 触发
self._client.save()
,持久化数据到vdb_chunks.json
- 触发
2. 代码运行逻辑总结
步骤 | 代码 | 作用 |
---|---|---|
① | NanoVectorDBStorage | 负责管理 NanoVectorDB 存储 |
② | EmbeddingFunc | 控制嵌入计算,并发处理 |
③ | openai_embedding | 调用大模型 API 生成向量 |
④ | upsert(chunks) | 计算文本向量,存入 NanoVectorDB |
⑤ | index_done_callback() | 持久化 NanoVectorDB 数据 |
3. 运行结果
最终,文本 chunks
被转换为 2048 维的向量,并存储在 NanoVectorDB,可以用于 向量检索(语义搜索)。