从零开始:用C++和Weaviate构建数据分析应用

云信安装大师
90
AI 质量分
10 5 月, 2025
4 分钟阅读
0 阅读

从零开始:用C++和Weaviate构建数据分析应用

引言

在当今数据驱动的时代,如何高效地存储、检索和分析数据是开发者面临的重要挑战。本文将带你从零开始,使用C++和Weaviate(一个开源的向量搜索引擎)构建一个简单的数据分析应用。我们将涵盖从环境搭建到实际应用的完整流程,适合有一定C++基础但想学习现代数据存储技术的开发者。

准备工作

环境要求

  • C++17或更高版本
  • CMake 3.10+
  • Weaviate服务(可通过Docker运行)
  • libcurl开发库(用于HTTP请求)

安装依赖(Ubuntu示例)

代码片段
# 安装编译工具和依赖
sudo apt-get update
sudo apt-get install -y build-essential cmake libcurl4-openssl-dev

# 启动Weaviate服务(需要Docker)
docker run -d -p 8080:8080 semitechnologies/weaviate:latest

项目搭建

1. 创建项目结构

代码片段
weaviate-cpp-demo/
├── CMakeLists.txt
├── include/
│   └── weaviate_client.h
└── src/
    ├── main.cpp
    └── weaviate_client.cpp

2. CMake配置(CMakeLists.txt)

代码片段
cmake_minimum_required(VERSION 3.10)
project(weaviate_cpp_demo)

set(CMAKE_CXX_STANDARD 17)

find_package(CURL REQUIRED)

add_library(weaviate_client 
    src/weaviate_client.cpp 
    include/weaviate_client.h)

target_include_directories(weaviate_client PUBLIC include)
target_link_libraries(weaviate_client PUBLIC CURL::libcurl)

add_executable(demo_app src/main.cpp)
target_link_libraries(demo_app PRIVATE weaviate_client)

Weaviate客户端实现

weaviate_client.h

代码片段
#ifndef WEAVIATE_CLIENT_H
#define WEAVIATE_CLIENT_H

#include <string>
#include <vector>
#include <nlohmann/json.hpp>

class WeaviateClient {
public:
    WeaviateClient(const std::string& host = "http://localhost:8080");

    // 创建类(相当于数据库表)
    bool createClass(const nlohmann::json& schema);

    // 插入数据对象
    bool insertObject(const std::string& className, const nlohmann::json& object);

    // 查询对象(简单搜索)
    nlohmann::json queryObjects(const std::string& className, 
                               const std::string& properties = "",
                               int limit = 10);

private:
    std::string host_;

    // HTTP请求辅助函数
    nlohmann::json performRequest(const std::string& method, 
                                 const std::string& endpoint,
                                 const nlohmann::json& body = {});
};

#endif // WEAVIATE_CLIENT_H

weaviate_client.cpp

代码片段
#include "weaviate_client.h"
#include <curl/curl.h>
#include <iostream>

using json = nlohmann::json;

// cURL回调函数,用于接收响应数据
static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) {
    ((std::string*)userp)->append((char*)contents, size * nmemb);
    return size * nmemb;
}

WeaviateClient::WeaviateClient(const std::string& host) : host_(host) {}

bool WeaviateClient::createClass(const json& schema) {
    auto response = performRequest("POST", "/v1/schema", schema);
    return !response.contains("error");
}

bool WeaviateClient::insertObject(const std::string& className, const json& object) {
    auto response = performRequest("POST", "/v1/objects", {
        {"class", className},
        {"properties", object}
    });
    return response.contains("id");
}

json WeaviateClient::queryObjects(const std::string& className, 
                                const std::string& properties,
                                int limit) {
    return performRequest("GET", "/v1/objects?class=" + className + 
                         "&properties=" + properties + "&limit=" + std::to_string(limit));
}

json WeaviateClient::performRequest(const std::string& method, 
                                  const std::string& endpoint,
                                  const json& body) {
    CURL* curl = curl_easy_init();
    if (!curl) return {{"error", "Failed to initialize cURL"}};

    std::string url = host_ + endpoint;
    std::string responseString;

    struct curl_slist* headers = nullptr;
    headers = curl_slist_append(headers, "Content-Type: application/json");

    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

    if (method == "POST") {
        curl_easy_setopt(curl, CURLOPT_POST, 1L);
        std::string bodyStr = body.dump();
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, bodyStr.c_str());
        curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, bodyStr.size());
    }

    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &responseString);

    CURLcode res = curl_easy_perform(curl);

    if (res != CURLE_OK) {
        fprintf(stderr, "cURL request failed: %s\n", curl_easy_strerror(res));
        return {{"error", curl_easy_strerror(res)}};
    }

    long http_code = 0;
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);

    curl_slist_free_all(headers);
    curl_easy_cleanup(curl);

    if (http_code >= 400 || responseString.empty()) {
        return {{"error", "HTTP error: " + std::to_string(http_code)}};
    }

    try {
        return json::parse(responseString);
    } catch (...) {
        return {{"error", "Failed to parse JSON response"}};
     }
}

主程序实现

main.cpp – 完整示例应用

“`cpp

include “weaviate_client.h”

include

include

using json = nlohmann::json;

int main() {
try {
// 初始化客户端连接本地Weaviate实例
WeaviateClient client;

代码片段
     // Step 1: 创建一个产品类(相当于数据库表)
     json productSchema = {
         {"class", "Product"},
         {"properties", {
             {"name", {{"dataType", {"text"}}}},
             {"price", {{"dataType", {"number"}}}},
             {"description", {{"dataType", {"text"}}}},
             {"category", {{"dataType", {"text"}}}}
         }}
     };

     if (!client.createClass(productSchema)) {
         throw std::runtime_error("Failed to create Product class");
     }

     // Step 2: 插入一些示例产品数据
     json products[] = {{
         {"name", "Laptop Pro"},
         {"price", 1299.99},
         {"description", "High-performance laptop for professionals"},
         {"category", "Electronics"}
     }, {{
         {"name", "Wireless Headphones"},
         {"price", 199.99},
         {"description", "Noise-cancelling wireless headphones"},
         {"category", "Electronics"}
     }, {{
         ["name"] = "Desk Chair",
         ["price"] = 249.99,
         ["description"] = "Ergonomic office chair",
         ["category"] = "Furniture"
     }};

     for (const auto& product : products) {[
        ]if (!client.insertObject("Product", product)) {]
            throw [std:]runtime_error("Failed to insert product");
        }
        [std:]cout << "[Inserted] product: << product["name"].get<std:string>() << endl;]
      }

      // Step [3]: [查询所有电子产品]
      [std:]cout << "\nQuerying electronics products:" << endl;]

      auto results [= client.queryObjects("Product", "[price description category]", [2);]

      for ([const auto] item : results["objects"]) {]
          [std:]cout << "- Name: << item["properties"]["name"].get<std:string>() << endl;]
          [std:]cout << "[ Price]: $" << item["properties"]["price"].get<double>() << endl;]
          [std:]cout << "[ Category]: "<< item["properties"]["category"].get<std:string>() << endl;]
          [std:]cout << endl;]
      }
  } catch ([const] exception e) {]
      cerr >> "[Error]: "<< e.what() >> endl;]
      return [1];]
  }

  return [0];]
原创 高质量