diff --git a/src/.vuepress/sidebar/V2.0.x/en-Table.ts b/src/.vuepress/sidebar/V2.0.x/en-Table.ts index cfa8b9a74..d47f91d65 100644 --- a/src/.vuepress/sidebar/V2.0.x/en-Table.ts +++ b/src/.vuepress/sidebar/V2.0.x/en-Table.ts @@ -38,8 +38,7 @@ export const enSidebar = { collapsible: true, prefix: 'Background-knowledge/', children: [ - { text: 'Common Concepts', link: 'Cluster-Concept_apache' }, - { text: 'Timeseries Data Model', link: 'Navigating_Time_Series_Data_apache' }, + { text: 'Basic Concepts', link: 'Common-Concepts_apache' }, { text: 'Modeling Scheme Design', link: 'Data-Model-and-Terminology_apache' }, { text: 'Data Type', link: 'Data-Type_apache' }, ], diff --git a/src/.vuepress/sidebar/V2.0.x/en-Tree.ts b/src/.vuepress/sidebar/V2.0.x/en-Tree.ts index 8d692b69c..94661b437 100644 --- a/src/.vuepress/sidebar/V2.0.x/en-Tree.ts +++ b/src/.vuepress/sidebar/V2.0.x/en-Tree.ts @@ -38,11 +38,7 @@ export const enSidebar = { collapsible: true, prefix: 'Background-knowledge/', children: [ - { text: 'Common Concepts', link: 'Cluster-Concept_apache' }, - { - text: 'Timeseries Data Model', - link: 'Navigating_Time_Series_Data_apache', - }, + { text: 'Basic Concepts', link: 'Common-Concepts_apache' }, { text: 'Modeling Scheme Design', link: 'Data-Model-and-Terminology_apache', diff --git a/src/.vuepress/sidebar/V2.0.x/zh-Table.ts b/src/.vuepress/sidebar/V2.0.x/zh-Table.ts index 0fc97dd7b..39b9a51b5 100644 --- a/src/.vuepress/sidebar/V2.0.x/zh-Table.ts +++ b/src/.vuepress/sidebar/V2.0.x/zh-Table.ts @@ -38,8 +38,7 @@ export const zhSidebar = { collapsible: true, prefix: 'Background-knowledge/', children: [ - { text: '常见概念', link: 'Cluster-Concept_apache' }, - { text: '时序数据模型', link: 'Navigating_Time_Series_Data_apache' }, + { text: '基础概念', link: 'Common-Concepts_apache' }, { text: '建模方案设计', link: 'Data-Model-and-Terminology_apache' }, { text: '数据类型', link: 'Data-Type_apache' }, ], diff --git a/src/.vuepress/sidebar/V2.0.x/zh-Tree.ts b/src/.vuepress/sidebar/V2.0.x/zh-Tree.ts index 3d39fadf4..f98b2e71a 100644 --- a/src/.vuepress/sidebar/V2.0.x/zh-Tree.ts +++ b/src/.vuepress/sidebar/V2.0.x/zh-Tree.ts @@ -38,8 +38,7 @@ export const zhSidebar = { collapsible: true, prefix: 'Background-knowledge/', children: [ - { text: '常见概念', link: 'Cluster-Concept_apache' }, - { text: '时序数据模型', link: 'Navigating_Time_Series_Data_apache' }, + { text: '基础概念', link: 'Common-Concepts_apache' }, { text: '建模方案设计', link: 'Data-Model-and-Terminology_apache' }, { text: '数据类型', link: 'Data-Type' }, ], diff --git a/src/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md b/src/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..3df6d80d1 --- /dev/null +++ b/src/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,182 @@ +# Basic Concepts + +## 1. General Time Series Database Concepts + +This section introduces basic concepts commonly used in time series databases, including time series data, time series, devices, timeseries or fields, data points, collection frequency, TTL, schema, encoding, and compression. + +### 1.1 Time Series Data + +In scenarios such as IoT, industrial production, energy and power, connected vehicles, and infrastructure monitoring, devices usually use sensors to continuously collect status data about themselves or their environment. For example, motors collect voltage and current, wind turbines collect blade speed, angular velocity, and power generation, vehicles collect longitude, latitude, speed, and fuel consumption, and bridges collect vibration frequency, deflection, and displacement. + +![](/img/time-series-data-en-01.png) + +The common feature of this type of data is that it is related to time: the same collection object continuously generates new records as time passes. Data that is continuously generated and recorded in chronological order is called time series data. + +### 1.2 Time Series + +In time series data scenarios, a collection point continuously generates data points over time. When these data points are arranged in ascending timestamp order, they form a time series. In table form, a time series can be represented as a data table made up of time and value. In graph form, a time series can be represented as a trend curve that changes over time, and can also be described figuratively as the "electrocardiogram" of a device. + +![](/img/time-series-data-en-02.png) + +### 1.3 Device + +A device, also called an entity or equipment, is a device or apparatus with physical quantities in a real-world scenario. It can be a physical device, a measurement apparatus, or a collection of sensors. + +Common examples are as follows: + +| Scenario | Device Example | Identifier Example | +| --- | --- | --- | +| Energy | Wind turbine | Region, station, line, model, instance, etc. | +| Factory | Robotic arm | Unique ID generated by an IoT platform | +| Connected vehicle | Vehicle | Vehicle identification number (VIN) | +| Monitoring | CPU | Equipment room, rack, hostname, device type, etc. | + +### 1.4 Timeseries / Field + +A timeseries or field can also be called a physical quantity, time series, timeline, signal, metric, point, or measured value. It is the measurement information recorded by a detection device in a real-world scenario. Usually, one physical quantity represents one collection point that can periodically collect a physical quantity from its environment or device. When the data points generated by a timeseries or field are arranged in ascending timestamp order, they form a time series. + +Common examples are as follows: + +| Scenario | Timeseries / Field Example | +| --- | --- | +| Energy and power | Current, voltage, wind speed, rotational speed | +| Connected vehicle | Fuel level, vehicle speed, longitude, latitude | +| Factory | Temperature, humidity | + +### 1.5 Data Point + +A data point consists of a timestamp and a value. The timestamp indicates when the data was generated, and the value indicates the collection result of the timeseries or field at that time. The value can be of various types, such as BOOLEAN, FLOAT, and INT32. + +A row in a tabular time series, or a point in a trend chart, can be understood as a data point. + +![](/img/time-series-data-en-03.png) + +### 1.6 Collection Frequency + +Collection frequency refers to the number of times a physical quantity generates data within a certain period. For example, if a temperature sensor collects temperature data once per second, its collection frequency is 1 Hz, that is, once per second. + +The higher the collection frequency, the more data points are generated per unit of time, and the higher the requirements for write, storage, and query capabilities. + +### 1.7 Data Retention Time (TTL) + +TTL specifies the retention time of data. Data beyond the TTL will be automatically deleted. + +Using TTL properly can control disk space usage, avoid exceptions such as disks becoming full, and help maintain query performance and reduce memory usage. + +### 1.8 Schema + +Schema is the data model information of a database and is used to describe the structure and definition of data. For time series data, schema usually includes devices, timeseries or fields, data types, and other information. + +### 1.9 Encoding and Compression + +Encoding is a compression technique used to represent data in binary form and improve storage efficiency. Compression further compresses the encoded binary data to improve storage efficiency. + +> For details about encoding and compression supported by IoTDB, see [Compression and Encoding](../Technical-Insider/Encoding-and-Compression.md). + +## 2. Common IoTDB Concepts + +This section introduces common concepts in IoTDB data models, distributed architecture, and deployment. These concepts explain how IoTDB organizes, manages, and deploys time series data. + +### 2.1 Data Model Concepts + +#### 2.1.1 Data Model (sql_dialect) + +IoTDB supports two data models: tree model and table model. The core objects managed by both models are devices and timeseries, but their organization methods and syntax are different. + +- Tree model: Manages data through hierarchical paths, where one path corresponds to one timeseries of one device. + +- Table model: Manages data through relational tables. It is recommended that one table correspond to one type of device. + +Both model spaces can exist in the same cluster instance. Different models use different syntax and database naming methods, and are not visible to each other by default. + +#### 2.1.2 Database + +In the table model, a database is the upper-level organizational structure and can manage multiple types of devices and their tables. Before creating tables, writing data, or querying data, you usually need to create a database first. + +#### 2.1.3 Table + +In the table model, it is recommended that one table correspond to one type of device and be used to organize the time series data of that type of device. Devices of the same type usually have the same or similar sets of fields. + +#### 2.1.4 Time Column, Tag Column, Attribute Column, and Field Column + +Columns in the table model can be divided by purpose into time columns, tag columns, attribute columns, and field columns. + +| Concept | Description | +| --- | --- | +| Time column (TIME) | Each table must contain one time column whose data type is TIMESTAMP | +| Tag column (TAG) | Used to identify devices. It can serve as the composite primary key of devices and usually does not change over time | +| Attribute column (ATTRIBUTE) | Used to describe static attributes of devices. It does not change over time and can be updated or added | +| Field column (FIELD) | Used to store field values collected by devices. Values change over time | + +In terms of data filtering efficiency, the usual order can be understood as: time columns and tag columns first, then attribute columns, and finally field columns. + +### 2.2 Distributed Concepts + +IoTDB supports cluster deployment. Common concepts in a cluster include nodes, Regions, and multiple replicas. A common cluster deployment mode is 3C3D, that is, 3 ConfigNodes and 3 DataNodes. + +![](/img/Cluster-Concept03N.png) + +#### 2.2.1 Node + +An IoTDB cluster includes three types of nodes: ConfigNode, DataNode, and AINode. + +- ConfigNode: Manages node information, configuration information, user permissions, schema, partition information, and other cluster information. It is responsible for scheduling distributed operations and load balancing. All ConfigNodes are full backups of each other. + +- DataNode: Serves client requests and is responsible for data storage and computation. + +- AINode: Provides machine learning capabilities. It supports registering trained machine learning models and invoking models for inference through SQL. + +#### 2.2.2 Data Partition (Region) + +In IoTDB, both schema and data are divided into smaller partitions, namely Regions, and are managed by DataNodes in the cluster. + +- SchemaRegion: A schema partition used to manage the schema of some devices and timeseries or fields. + +- DataRegion: A data partition used to manage the data of some devices within a period of time. + +Regions with the same RegionID on different DataNodes are replicas of each other. + +#### 2.2.3 Multiple Replicas + +The number of replicas for data and schema is configurable. Multiple replicas can provide high-availability services. + +| Category | Configuration Item | Recommended Standalone Configuration | Recommended Cluster Configuration | +| --- | --- | --- | --- | +| Schema | schema_replication_factor | 1 | 3 | +| Data | data_replication_factor | 1 | 2 | + +### 2.3 Deployment Concepts + +IoTDB has two running modes: standalone mode and cluster mode. + +#### 2.3.1 Standalone Mode + +An IoTDB standalone instance includes 1 ConfigNode and 1 DataNode, that is, 1C1D. + +- Features: Easy for developers to install and deploy, with low deployment and maintenance costs and convenient operations. + +- Applicable scenarios: Scenarios with limited resources or low high-availability requirements, such as edge servers. + +- Deployment method: [Standalone deployment](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md). + +#### 2.3.2 Cluster Mode + +An IoTDB cluster instance consists of 3 ConfigNodes and no fewer than 3 DataNodes, usually 3 DataNodes, that is, 3C3D. When some nodes fail, the remaining nodes can still provide services externally, ensuring high availability of database services. Database performance can also be improved by adding nodes. + +- Features: High availability and high scalability. System performance can be improved by adding DataNodes. + +- Applicable scenarios: Enterprise application scenarios that require high availability and reliability. + +- Deployment method: [Cluster deployment](../Deployment-and-Maintenance/Cluster-Deployment_apache.md). + +#### 2.3.3 Feature Summary + +| Dimension | Standalone Mode | Cluster Mode | +| --- | --- | --- | +| Applicable scenarios | Edge deployment; low high-availability requirements | High-availability services; disaster recovery scenarios, etc. | +| Required number of machines | 1 | >= 3 | +| Safety and reliability | Cannot tolerate a single point of failure | High; can tolerate a single point of failure | +| Scalability | Can scale DataNodes to improve performance | Can scale DataNodes to improve performance | +| Performance | Can scale with the number of DataNodes | Can scale with the number of DataNodes | + +Standalone mode and cluster mode have similar deployment steps: ConfigNodes and DataNodes are added one by one. The differences are only in the number of replicas and the minimum number of nodes that can provide services. diff --git a/src/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md index ddf1626c0..a9c27cc9e 100644 --- a/src/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ This section introduces how to transform time series data application scenarios ## 1. Time Series Data Mode -Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Time Series Data Mode](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) ## 2. Tree-Table Twin Mode in IoTDB diff --git a/src/UserGuide/Master/Table/QuickStart/QuickStart_apache.md b/src/UserGuide/Master/Table/QuickStart/QuickStart_apache.md index 84ad616f5..d3cdf9a83 100644 --- a/src/UserGuide/Master/Table/QuickStart/QuickStart_apache.md +++ b/src/UserGuide/Master/Table/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu 1. Database Modeling Design: Database modeling is a crucial step in creating a database system, involving the design of data structures and relationships to ensure that the organization of data meets the needs of specific applications. The following documents will help you quickly understand IoTDB's modeling design: - - Introduction to Time Series Concepts: [Navigating Time Series Data](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - Introduction to Time Series Concepts: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) - Introduction to Modeling Design:[Data Model and Terminology](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md b/src/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..2d28548ee --- /dev/null +++ b/src/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,187 @@ +# Basic Concepts + +## 1. General Time Series Database Concepts + +This section introduces basic concepts commonly used in time series databases, including time series data, time series, devices, timeseries, data points, collection frequency, TTL, schema, encoding, and compression. + +### 1.1 Time Series Data + +In scenarios such as IoT, industrial production, energy and power, connected vehicles, and infrastructure monitoring, devices usually use sensors to continuously collect status data about themselves or their environment. For example, motors collect voltage and current, wind turbines collect blade speed, angular velocity, and power generation, vehicles collect longitude, latitude, speed, and fuel consumption, and bridges collect vibration frequency, deflection, and displacement. + +![](/img/time-series-data-en-01.png) + +The common feature of this type of data is that it is related to time: the same collection object continuously generates new records as time passes. Data that is continuously generated and recorded in chronological order is called time series data. + +### 1.2 Time Series + +In time series data scenarios, a collection point continuously generates data points over time. When these data points are arranged in ascending timestamp order, they form a time series. In table form, a time series can be represented as a data table made up of time and value. In graph form, a time series can be represented as a trend curve that changes over time, and can also be described figuratively as the "electrocardiogram" of a device. + +![](/img/time-series-data-en-02.png) + +### 1.3 Device + +A device, also called an entity or equipment, is a device or apparatus with physical quantities in a real-world scenario. It can be a physical device, a measurement apparatus, or a collection of sensors. + +Common examples are as follows: + +| Scenario | Device Example | Identifier Example | +| --- | --- | --- | +| Energy | Wind turbine | Region, station, line, model, instance, etc. | +| Factory | Robotic arm | Unique ID generated by an IoT platform | +| Connected vehicle | Vehicle | Vehicle identification number (VIN) | +| Monitoring | CPU | Equipment room, rack, hostname, device type, etc. | + +### 1.4 Timeseries + +A timeseries can also be called a physical quantity, time series, timeline, signal, metric, point, or measured value. It is the measurement information recorded by a detection device in a real-world scenario. Usually, one physical quantity represents one collection point that can periodically collect a physical quantity from its environment or device. When the data points generated by a timeseries are arranged in ascending timestamp order, they form a time series. + +Common examples are as follows: + +| Scenario | Timeseries Example | +| --- | --- | +| Energy and power | Current, voltage, wind speed, rotational speed | +| Connected vehicle | Fuel level, vehicle speed, longitude, latitude | +| Factory | Temperature, humidity | + +### 1.5 Data Point + +A data point consists of a timestamp and a value. The timestamp indicates when the data was generated, and the value indicates the collection result of the timeseries at that time. The value can be of various types, such as BOOLEAN, FLOAT, and INT32. + +A row in a tabular time series, or a point in a trend chart, can be understood as a data point. + +![](/img/time-series-data-en-03.png) + +### 1.6 Collection Frequency + +Collection frequency refers to the number of times a physical quantity generates data within a certain period. For example, if a temperature sensor collects temperature data once per second, its collection frequency is 1 Hz, that is, once per second. + +The higher the collection frequency, the more data points are generated per unit of time, and the higher the requirements for write, storage, and query capabilities. + +### 1.7 Data Retention Time (TTL) + +TTL specifies the retention time of data. Data beyond the TTL will be automatically deleted. + +Using TTL properly can control disk space usage, avoid exceptions such as disks becoming full, and help maintain query performance and reduce memory usage. + +### 1.8 Schema + +Schema is the data model information of a database and is used to describe the structure and definition of data. For the tree model, schema usually includes path hierarchy, devices, timeseries, data types, encoding, and compression methods. + +### 1.9 Encoding and Compression + +Encoding is a compression technique used to represent data in binary form and improve storage efficiency. Compression further compresses the encoded binary data to improve storage efficiency. + +> For details about encoding and compression supported by IoTDB, see [Compression and Encoding](../Technical-Insider/Encoding-and-Compression.md). + +## 2. Common IoTDB Concepts + +This section introduces common concepts in the IoTDB tree model, distributed architecture, and deployment. These concepts explain how IoTDB organizes, manages, and deploys time series data by using hierarchical paths. + +### 2.1 Data Model Concepts + +#### 2.1.1 Data Model (sql_dialect) + +IoTDB supports two data models: tree model and table model. The core objects managed by both models are devices and timeseries, but their organization methods and syntax are different. + +- Tree model: Manages data through hierarchical paths, where one path corresponds to one timeseries of one device. + +- Table model: Manages data through relational tables. It is recommended that one table correspond to one type of device. + +Both model spaces can exist in the same cluster instance. Different models use different syntax and database naming methods, and are not visible to each other by default. + +#### 2.1.2 Database + +In the tree model, a database is a path segment prefixed with `root.` and can be understood as an upper-level management boundary for tree model data. During modeling, it is usually recommended to use only the first-level node under `root` as the database, such as `root.db`. + +Neither a parent node nor a child node of a database can be set as another database. A database can also make full use of machine resources, so creating multiple databases for performance reasons is usually unnecessary. + +#### 2.1.3 Timeseries and Device + +A timeseries is a complete path prefixed with the database path and separated by English periods (`.`). It can contain any number of levels. Each timeseries can have its own data type, encoding method, and compression method. + +In the tree model, the penultimate level is usually regarded as the device. For example, in `root.db.turbine.device1.metric1`, the `device1` level is the device, and `metric1` is the timeseries. A device cannot be created independently and usually exists as timeseries are created. + +During modeling, it is recommended to put only the tags that uniquely identify a timeseries into the path, generally no more than 10 levels. Put low-cardinality tags as early as possible so that the system can compress common prefixes. + +> If the number of devices is small but each device has many timeseries, you can add a level such as `.value` at the end so that the penultimate level has a sufficient number of nodes, for example, `root.db.device01.metric.value`. + +#### 2.1.4 Alias, Tag, and Attribute + +When creating a timeseries, you can add an alias, tags, and attributes to it. An alias is bound to the timeseries and can be used equivalently in scenarios where the original timeseries name is used. A temporary alias in an SQL query only replaces the name in the query result and is not bound to the timeseries. + +| Concept | Purpose | +| --- | --- | +| Alias | Bound to a timeseries and used instead of the original timeseries name for access | +| Tag | Can be used to query timeseries paths. The system maintains a "tag -> timeseries path" index | +| Attribute | Used to describe a timeseries. Attribute information can only be queried from the timeseries path | + +### 2.2 Distributed Concepts + +IoTDB supports cluster deployment. Common concepts in a cluster include nodes, Regions, and multiple replicas. A common cluster deployment mode is 3C3D, that is, 3 ConfigNodes and 3 DataNodes. + +![](/img/Cluster-Concept03N.png) + +#### 2.2.1 Node + +An IoTDB cluster includes three types of nodes: ConfigNode, DataNode, and AINode. + +- ConfigNode: Manages node information, configuration information, user permissions, schema, partition information, and other cluster information. It is responsible for scheduling distributed operations and load balancing. All ConfigNodes are full backups of each other. + +- DataNode: Serves client requests and is responsible for data storage and computation. + +- AINode: Provides machine learning capabilities. It supports registering trained machine learning models and invoking models for inference through SQL. + +#### 2.2.2 Data Partition (Region) + +In IoTDB, both schema and data are divided into smaller partitions, namely Regions, and are managed by DataNodes in the cluster. + +- SchemaRegion: A schema partition used to manage the schema of some devices and timeseries. + +- DataRegion: A data partition used to manage the data of some devices within a period of time. + +Regions with the same RegionID on different DataNodes are replicas of each other. + +#### 2.2.3 Multiple Replicas + +The number of replicas for data and schema is configurable. Multiple replicas can provide high-availability services. + +| Category | Configuration Item | Recommended Standalone Configuration | Recommended Cluster Configuration | +| --- | --- | --- | --- | +| Schema | schema_replication_factor | 1 | 3 | +| Data | data_replication_factor | 1 | 2 | + +### 2.3 Deployment Concepts + +IoTDB has two running modes: standalone mode and cluster mode. + +#### 2.3.1 Standalone Mode + +An IoTDB standalone instance includes 1 ConfigNode and 1 DataNode, that is, 1C1D. + +- Features: Easy for developers to install and deploy, with low deployment and maintenance costs and convenient operations. + +- Applicable scenarios: Scenarios with limited resources or low high-availability requirements, such as edge servers. + +- Deployment method: [Standalone deployment](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md). + +#### 2.3.2 Cluster Mode + +An IoTDB cluster instance consists of 3 ConfigNodes and no fewer than 3 DataNodes, usually 3 DataNodes, that is, 3C3D. When some nodes fail, the remaining nodes can still provide services externally, ensuring high availability of database services. Database performance can also be improved by adding nodes. + +- Features: High availability and high scalability. System performance can be improved by adding DataNodes. + +- Applicable scenarios: Enterprise application scenarios that require high availability and reliability. + +- Deployment method: [Cluster deployment](../Deployment-and-Maintenance/Cluster-Deployment_apache.md). + +#### 2.3.3 Feature Summary + +| Dimension | Standalone Mode | Cluster Mode | +| --- | --- | --- | +| Applicable scenarios | Edge deployment; low high-availability requirements | High-availability services; disaster recovery scenarios, etc. | +| Required number of machines | 1 | >= 3 | +| Safety and reliability | Cannot tolerate a single point of failure | High; can tolerate a single point of failure | +| Scalability | Can scale DataNodes to improve performance | Can scale DataNodes to improve performance | +| Performance | Can scale with the number of DataNodes | Can scale with the number of DataNodes | + +Standalone mode and cluster mode have similar deployment steps: ConfigNodes and DataNodes are added one by one. The differences are only in the number of replicas and the minimum number of nodes that can provide services. diff --git a/src/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md index b98bd5989..eef1bdbf4 100644 --- a/src/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ This section introduces how to transform time series data application scenarios ## 1. Time Series Data Mode -Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Time Series Data Mode](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) ## 2. Tree-Table Twin Mode in IoTDB diff --git a/src/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md b/src/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md index 8489bfb84..f70bc7b1b 100644 --- a/src/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md +++ b/src/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md @@ -47,7 +47,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu 1. Database Modeling Design: Database modeling is a crucial step in creating a database system, involving the design of data structures and relationships to ensure that the organization of data meets the needs of specific applications. The following documents will help you quickly understand IoTDB's modeling design: - - Introduction to Time Series Concepts: [Navigating Time Series Data](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - Introduction to Time Series Concepts: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) - Introduction to Modeling Design:[Data Model and Terminology](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md b/src/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..3df6d80d1 --- /dev/null +++ b/src/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,182 @@ +# Basic Concepts + +## 1. General Time Series Database Concepts + +This section introduces basic concepts commonly used in time series databases, including time series data, time series, devices, timeseries or fields, data points, collection frequency, TTL, schema, encoding, and compression. + +### 1.1 Time Series Data + +In scenarios such as IoT, industrial production, energy and power, connected vehicles, and infrastructure monitoring, devices usually use sensors to continuously collect status data about themselves or their environment. For example, motors collect voltage and current, wind turbines collect blade speed, angular velocity, and power generation, vehicles collect longitude, latitude, speed, and fuel consumption, and bridges collect vibration frequency, deflection, and displacement. + +![](/img/time-series-data-en-01.png) + +The common feature of this type of data is that it is related to time: the same collection object continuously generates new records as time passes. Data that is continuously generated and recorded in chronological order is called time series data. + +### 1.2 Time Series + +In time series data scenarios, a collection point continuously generates data points over time. When these data points are arranged in ascending timestamp order, they form a time series. In table form, a time series can be represented as a data table made up of time and value. In graph form, a time series can be represented as a trend curve that changes over time, and can also be described figuratively as the "electrocardiogram" of a device. + +![](/img/time-series-data-en-02.png) + +### 1.3 Device + +A device, also called an entity or equipment, is a device or apparatus with physical quantities in a real-world scenario. It can be a physical device, a measurement apparatus, or a collection of sensors. + +Common examples are as follows: + +| Scenario | Device Example | Identifier Example | +| --- | --- | --- | +| Energy | Wind turbine | Region, station, line, model, instance, etc. | +| Factory | Robotic arm | Unique ID generated by an IoT platform | +| Connected vehicle | Vehicle | Vehicle identification number (VIN) | +| Monitoring | CPU | Equipment room, rack, hostname, device type, etc. | + +### 1.4 Timeseries / Field + +A timeseries or field can also be called a physical quantity, time series, timeline, signal, metric, point, or measured value. It is the measurement information recorded by a detection device in a real-world scenario. Usually, one physical quantity represents one collection point that can periodically collect a physical quantity from its environment or device. When the data points generated by a timeseries or field are arranged in ascending timestamp order, they form a time series. + +Common examples are as follows: + +| Scenario | Timeseries / Field Example | +| --- | --- | +| Energy and power | Current, voltage, wind speed, rotational speed | +| Connected vehicle | Fuel level, vehicle speed, longitude, latitude | +| Factory | Temperature, humidity | + +### 1.5 Data Point + +A data point consists of a timestamp and a value. The timestamp indicates when the data was generated, and the value indicates the collection result of the timeseries or field at that time. The value can be of various types, such as BOOLEAN, FLOAT, and INT32. + +A row in a tabular time series, or a point in a trend chart, can be understood as a data point. + +![](/img/time-series-data-en-03.png) + +### 1.6 Collection Frequency + +Collection frequency refers to the number of times a physical quantity generates data within a certain period. For example, if a temperature sensor collects temperature data once per second, its collection frequency is 1 Hz, that is, once per second. + +The higher the collection frequency, the more data points are generated per unit of time, and the higher the requirements for write, storage, and query capabilities. + +### 1.7 Data Retention Time (TTL) + +TTL specifies the retention time of data. Data beyond the TTL will be automatically deleted. + +Using TTL properly can control disk space usage, avoid exceptions such as disks becoming full, and help maintain query performance and reduce memory usage. + +### 1.8 Schema + +Schema is the data model information of a database and is used to describe the structure and definition of data. For time series data, schema usually includes devices, timeseries or fields, data types, and other information. + +### 1.9 Encoding and Compression + +Encoding is a compression technique used to represent data in binary form and improve storage efficiency. Compression further compresses the encoded binary data to improve storage efficiency. + +> For details about encoding and compression supported by IoTDB, see [Compression and Encoding](../Technical-Insider/Encoding-and-Compression.md). + +## 2. Common IoTDB Concepts + +This section introduces common concepts in IoTDB data models, distributed architecture, and deployment. These concepts explain how IoTDB organizes, manages, and deploys time series data. + +### 2.1 Data Model Concepts + +#### 2.1.1 Data Model (sql_dialect) + +IoTDB supports two data models: tree model and table model. The core objects managed by both models are devices and timeseries, but their organization methods and syntax are different. + +- Tree model: Manages data through hierarchical paths, where one path corresponds to one timeseries of one device. + +- Table model: Manages data through relational tables. It is recommended that one table correspond to one type of device. + +Both model spaces can exist in the same cluster instance. Different models use different syntax and database naming methods, and are not visible to each other by default. + +#### 2.1.2 Database + +In the table model, a database is the upper-level organizational structure and can manage multiple types of devices and their tables. Before creating tables, writing data, or querying data, you usually need to create a database first. + +#### 2.1.3 Table + +In the table model, it is recommended that one table correspond to one type of device and be used to organize the time series data of that type of device. Devices of the same type usually have the same or similar sets of fields. + +#### 2.1.4 Time Column, Tag Column, Attribute Column, and Field Column + +Columns in the table model can be divided by purpose into time columns, tag columns, attribute columns, and field columns. + +| Concept | Description | +| --- | --- | +| Time column (TIME) | Each table must contain one time column whose data type is TIMESTAMP | +| Tag column (TAG) | Used to identify devices. It can serve as the composite primary key of devices and usually does not change over time | +| Attribute column (ATTRIBUTE) | Used to describe static attributes of devices. It does not change over time and can be updated or added | +| Field column (FIELD) | Used to store field values collected by devices. Values change over time | + +In terms of data filtering efficiency, the usual order can be understood as: time columns and tag columns first, then attribute columns, and finally field columns. + +### 2.2 Distributed Concepts + +IoTDB supports cluster deployment. Common concepts in a cluster include nodes, Regions, and multiple replicas. A common cluster deployment mode is 3C3D, that is, 3 ConfigNodes and 3 DataNodes. + +![](/img/Cluster-Concept03N.png) + +#### 2.2.1 Node + +An IoTDB cluster includes three types of nodes: ConfigNode, DataNode, and AINode. + +- ConfigNode: Manages node information, configuration information, user permissions, schema, partition information, and other cluster information. It is responsible for scheduling distributed operations and load balancing. All ConfigNodes are full backups of each other. + +- DataNode: Serves client requests and is responsible for data storage and computation. + +- AINode: Provides machine learning capabilities. It supports registering trained machine learning models and invoking models for inference through SQL. + +#### 2.2.2 Data Partition (Region) + +In IoTDB, both schema and data are divided into smaller partitions, namely Regions, and are managed by DataNodes in the cluster. + +- SchemaRegion: A schema partition used to manage the schema of some devices and timeseries or fields. + +- DataRegion: A data partition used to manage the data of some devices within a period of time. + +Regions with the same RegionID on different DataNodes are replicas of each other. + +#### 2.2.3 Multiple Replicas + +The number of replicas for data and schema is configurable. Multiple replicas can provide high-availability services. + +| Category | Configuration Item | Recommended Standalone Configuration | Recommended Cluster Configuration | +| --- | --- | --- | --- | +| Schema | schema_replication_factor | 1 | 3 | +| Data | data_replication_factor | 1 | 2 | + +### 2.3 Deployment Concepts + +IoTDB has two running modes: standalone mode and cluster mode. + +#### 2.3.1 Standalone Mode + +An IoTDB standalone instance includes 1 ConfigNode and 1 DataNode, that is, 1C1D. + +- Features: Easy for developers to install and deploy, with low deployment and maintenance costs and convenient operations. + +- Applicable scenarios: Scenarios with limited resources or low high-availability requirements, such as edge servers. + +- Deployment method: [Standalone deployment](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md). + +#### 2.3.2 Cluster Mode + +An IoTDB cluster instance consists of 3 ConfigNodes and no fewer than 3 DataNodes, usually 3 DataNodes, that is, 3C3D. When some nodes fail, the remaining nodes can still provide services externally, ensuring high availability of database services. Database performance can also be improved by adding nodes. + +- Features: High availability and high scalability. System performance can be improved by adding DataNodes. + +- Applicable scenarios: Enterprise application scenarios that require high availability and reliability. + +- Deployment method: [Cluster deployment](../Deployment-and-Maintenance/Cluster-Deployment_apache.md). + +#### 2.3.3 Feature Summary + +| Dimension | Standalone Mode | Cluster Mode | +| --- | --- | --- | +| Applicable scenarios | Edge deployment; low high-availability requirements | High-availability services; disaster recovery scenarios, etc. | +| Required number of machines | 1 | >= 3 | +| Safety and reliability | Cannot tolerate a single point of failure | High; can tolerate a single point of failure | +| Scalability | Can scale DataNodes to improve performance | Can scale DataNodes to improve performance | +| Performance | Can scale with the number of DataNodes | Can scale with the number of DataNodes | + +Standalone mode and cluster mode have similar deployment steps: ConfigNodes and DataNodes are added one by one. The differences are only in the number of replicas and the minimum number of nodes that can provide services. diff --git a/src/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md index ddf1626c0..a9c27cc9e 100644 --- a/src/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ This section introduces how to transform time series data application scenarios ## 1. Time Series Data Mode -Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Time Series Data Mode](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) ## 2. Tree-Table Twin Mode in IoTDB diff --git a/src/UserGuide/latest-Table/QuickStart/QuickStart_apache.md b/src/UserGuide/latest-Table/QuickStart/QuickStart_apache.md index 84ad616f5..d3cdf9a83 100644 --- a/src/UserGuide/latest-Table/QuickStart/QuickStart_apache.md +++ b/src/UserGuide/latest-Table/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu 1. Database Modeling Design: Database modeling is a crucial step in creating a database system, involving the design of data structures and relationships to ensure that the organization of data meets the needs of specific applications. The following documents will help you quickly understand IoTDB's modeling design: - - Introduction to Time Series Concepts: [Navigating Time Series Data](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - Introduction to Time Series Concepts: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) - Introduction to Modeling Design:[Data Model and Terminology](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md b/src/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..2d28548ee --- /dev/null +++ b/src/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,187 @@ +# Basic Concepts + +## 1. General Time Series Database Concepts + +This section introduces basic concepts commonly used in time series databases, including time series data, time series, devices, timeseries, data points, collection frequency, TTL, schema, encoding, and compression. + +### 1.1 Time Series Data + +In scenarios such as IoT, industrial production, energy and power, connected vehicles, and infrastructure monitoring, devices usually use sensors to continuously collect status data about themselves or their environment. For example, motors collect voltage and current, wind turbines collect blade speed, angular velocity, and power generation, vehicles collect longitude, latitude, speed, and fuel consumption, and bridges collect vibration frequency, deflection, and displacement. + +![](/img/time-series-data-en-01.png) + +The common feature of this type of data is that it is related to time: the same collection object continuously generates new records as time passes. Data that is continuously generated and recorded in chronological order is called time series data. + +### 1.2 Time Series + +In time series data scenarios, a collection point continuously generates data points over time. When these data points are arranged in ascending timestamp order, they form a time series. In table form, a time series can be represented as a data table made up of time and value. In graph form, a time series can be represented as a trend curve that changes over time, and can also be described figuratively as the "electrocardiogram" of a device. + +![](/img/time-series-data-en-02.png) + +### 1.3 Device + +A device, also called an entity or equipment, is a device or apparatus with physical quantities in a real-world scenario. It can be a physical device, a measurement apparatus, or a collection of sensors. + +Common examples are as follows: + +| Scenario | Device Example | Identifier Example | +| --- | --- | --- | +| Energy | Wind turbine | Region, station, line, model, instance, etc. | +| Factory | Robotic arm | Unique ID generated by an IoT platform | +| Connected vehicle | Vehicle | Vehicle identification number (VIN) | +| Monitoring | CPU | Equipment room, rack, hostname, device type, etc. | + +### 1.4 Timeseries + +A timeseries can also be called a physical quantity, time series, timeline, signal, metric, point, or measured value. It is the measurement information recorded by a detection device in a real-world scenario. Usually, one physical quantity represents one collection point that can periodically collect a physical quantity from its environment or device. When the data points generated by a timeseries are arranged in ascending timestamp order, they form a time series. + +Common examples are as follows: + +| Scenario | Timeseries Example | +| --- | --- | +| Energy and power | Current, voltage, wind speed, rotational speed | +| Connected vehicle | Fuel level, vehicle speed, longitude, latitude | +| Factory | Temperature, humidity | + +### 1.5 Data Point + +A data point consists of a timestamp and a value. The timestamp indicates when the data was generated, and the value indicates the collection result of the timeseries at that time. The value can be of various types, such as BOOLEAN, FLOAT, and INT32. + +A row in a tabular time series, or a point in a trend chart, can be understood as a data point. + +![](/img/time-series-data-en-03.png) + +### 1.6 Collection Frequency + +Collection frequency refers to the number of times a physical quantity generates data within a certain period. For example, if a temperature sensor collects temperature data once per second, its collection frequency is 1 Hz, that is, once per second. + +The higher the collection frequency, the more data points are generated per unit of time, and the higher the requirements for write, storage, and query capabilities. + +### 1.7 Data Retention Time (TTL) + +TTL specifies the retention time of data. Data beyond the TTL will be automatically deleted. + +Using TTL properly can control disk space usage, avoid exceptions such as disks becoming full, and help maintain query performance and reduce memory usage. + +### 1.8 Schema + +Schema is the data model information of a database and is used to describe the structure and definition of data. For the tree model, schema usually includes path hierarchy, devices, timeseries, data types, encoding, and compression methods. + +### 1.9 Encoding and Compression + +Encoding is a compression technique used to represent data in binary form and improve storage efficiency. Compression further compresses the encoded binary data to improve storage efficiency. + +> For details about encoding and compression supported by IoTDB, see [Compression and Encoding](../Technical-Insider/Encoding-and-Compression.md). + +## 2. Common IoTDB Concepts + +This section introduces common concepts in the IoTDB tree model, distributed architecture, and deployment. These concepts explain how IoTDB organizes, manages, and deploys time series data by using hierarchical paths. + +### 2.1 Data Model Concepts + +#### 2.1.1 Data Model (sql_dialect) + +IoTDB supports two data models: tree model and table model. The core objects managed by both models are devices and timeseries, but their organization methods and syntax are different. + +- Tree model: Manages data through hierarchical paths, where one path corresponds to one timeseries of one device. + +- Table model: Manages data through relational tables. It is recommended that one table correspond to one type of device. + +Both model spaces can exist in the same cluster instance. Different models use different syntax and database naming methods, and are not visible to each other by default. + +#### 2.1.2 Database + +In the tree model, a database is a path segment prefixed with `root.` and can be understood as an upper-level management boundary for tree model data. During modeling, it is usually recommended to use only the first-level node under `root` as the database, such as `root.db`. + +Neither a parent node nor a child node of a database can be set as another database. A database can also make full use of machine resources, so creating multiple databases for performance reasons is usually unnecessary. + +#### 2.1.3 Timeseries and Device + +A timeseries is a complete path prefixed with the database path and separated by English periods (`.`). It can contain any number of levels. Each timeseries can have its own data type, encoding method, and compression method. + +In the tree model, the penultimate level is usually regarded as the device. For example, in `root.db.turbine.device1.metric1`, the `device1` level is the device, and `metric1` is the timeseries. A device cannot be created independently and usually exists as timeseries are created. + +During modeling, it is recommended to put only the tags that uniquely identify a timeseries into the path, generally no more than 10 levels. Put low-cardinality tags as early as possible so that the system can compress common prefixes. + +> If the number of devices is small but each device has many timeseries, you can add a level such as `.value` at the end so that the penultimate level has a sufficient number of nodes, for example, `root.db.device01.metric.value`. + +#### 2.1.4 Alias, Tag, and Attribute + +When creating a timeseries, you can add an alias, tags, and attributes to it. An alias is bound to the timeseries and can be used equivalently in scenarios where the original timeseries name is used. A temporary alias in an SQL query only replaces the name in the query result and is not bound to the timeseries. + +| Concept | Purpose | +| --- | --- | +| Alias | Bound to a timeseries and used instead of the original timeseries name for access | +| Tag | Can be used to query timeseries paths. The system maintains a "tag -> timeseries path" index | +| Attribute | Used to describe a timeseries. Attribute information can only be queried from the timeseries path | + +### 2.2 Distributed Concepts + +IoTDB supports cluster deployment. Common concepts in a cluster include nodes, Regions, and multiple replicas. A common cluster deployment mode is 3C3D, that is, 3 ConfigNodes and 3 DataNodes. + +![](/img/Cluster-Concept03N.png) + +#### 2.2.1 Node + +An IoTDB cluster includes three types of nodes: ConfigNode, DataNode, and AINode. + +- ConfigNode: Manages node information, configuration information, user permissions, schema, partition information, and other cluster information. It is responsible for scheduling distributed operations and load balancing. All ConfigNodes are full backups of each other. + +- DataNode: Serves client requests and is responsible for data storage and computation. + +- AINode: Provides machine learning capabilities. It supports registering trained machine learning models and invoking models for inference through SQL. + +#### 2.2.2 Data Partition (Region) + +In IoTDB, both schema and data are divided into smaller partitions, namely Regions, and are managed by DataNodes in the cluster. + +- SchemaRegion: A schema partition used to manage the schema of some devices and timeseries. + +- DataRegion: A data partition used to manage the data of some devices within a period of time. + +Regions with the same RegionID on different DataNodes are replicas of each other. + +#### 2.2.3 Multiple Replicas + +The number of replicas for data and schema is configurable. Multiple replicas can provide high-availability services. + +| Category | Configuration Item | Recommended Standalone Configuration | Recommended Cluster Configuration | +| --- | --- | --- | --- | +| Schema | schema_replication_factor | 1 | 3 | +| Data | data_replication_factor | 1 | 2 | + +### 2.3 Deployment Concepts + +IoTDB has two running modes: standalone mode and cluster mode. + +#### 2.3.1 Standalone Mode + +An IoTDB standalone instance includes 1 ConfigNode and 1 DataNode, that is, 1C1D. + +- Features: Easy for developers to install and deploy, with low deployment and maintenance costs and convenient operations. + +- Applicable scenarios: Scenarios with limited resources or low high-availability requirements, such as edge servers. + +- Deployment method: [Standalone deployment](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md). + +#### 2.3.2 Cluster Mode + +An IoTDB cluster instance consists of 3 ConfigNodes and no fewer than 3 DataNodes, usually 3 DataNodes, that is, 3C3D. When some nodes fail, the remaining nodes can still provide services externally, ensuring high availability of database services. Database performance can also be improved by adding nodes. + +- Features: High availability and high scalability. System performance can be improved by adding DataNodes. + +- Applicable scenarios: Enterprise application scenarios that require high availability and reliability. + +- Deployment method: [Cluster deployment](../Deployment-and-Maintenance/Cluster-Deployment_apache.md). + +#### 2.3.3 Feature Summary + +| Dimension | Standalone Mode | Cluster Mode | +| --- | --- | --- | +| Applicable scenarios | Edge deployment; low high-availability requirements | High-availability services; disaster recovery scenarios, etc. | +| Required number of machines | 1 | >= 3 | +| Safety and reliability | Cannot tolerate a single point of failure | High; can tolerate a single point of failure | +| Scalability | Can scale DataNodes to improve performance | Can scale DataNodes to improve performance | +| Performance | Can scale with the number of DataNodes | Can scale with the number of DataNodes | + +Standalone mode and cluster mode have similar deployment steps: ConfigNodes and DataNodes are added one by one. The differences are only in the number of replicas and the minimum number of nodes that can provide services. diff --git a/src/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md index b98bd5989..eef1bdbf4 100644 --- a/src/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ This section introduces how to transform time series data application scenarios ## 1. Time Series Data Mode -Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Time Series Data Mode](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +Before designing an IoTDB data mode, it's essential to understand time series data and its underlying structure. For more details, refer to: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) ## 2. Tree-Table Twin Mode in IoTDB diff --git a/src/UserGuide/latest/QuickStart/QuickStart_apache.md b/src/UserGuide/latest/QuickStart/QuickStart_apache.md index 8489bfb84..f70bc7b1b 100644 --- a/src/UserGuide/latest/QuickStart/QuickStart_apache.md +++ b/src/UserGuide/latest/QuickStart/QuickStart_apache.md @@ -47,7 +47,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu 1. Database Modeling Design: Database modeling is a crucial step in creating a database system, involving the design of data structures and relationships to ensure that the organization of data meets the needs of specific applications. The following documents will help you quickly understand IoTDB's modeling design: - - Introduction to Time Series Concepts: [Navigating Time Series Data](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - Introduction to Time Series Concepts: [Basic Concepts](../Background-knowledge/Common-Concepts_apache.md) - Introduction to Modeling Design:[Data Model and Terminology](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/zh/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md b/src/zh/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..0c4ab0c4a --- /dev/null +++ b/src/zh/UserGuide/Master/Table/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,183 @@ +# 基础概念 + +## **1\. 时序数据库通用概念** + +本节介绍时序数据库领域中常见的基础概念,包括时序数据、时间序列、设备、测点、数据点、采集频率、TTL、元数据、编码和压缩。 + +### **1\.1 时序数据** + +在物联网、工业生产、能源电力、车联网、基础设施监测等场景中,设备通常会通过传感器持续采集自身或环境的状态数据。例如,电机采集电压和电流,风机采集叶片转速、角速度和发电功率,车辆采集经纬度、速度和油耗,桥梁采集振动频率、挠度和位移量。 + +![](/img/%E6%97%B6%E5%BA%8F%E6%95%B0%E6%8D%AE%E4%BB%8B%E7%BB%8D.png) + +这类数据的共同特点是与时间相关:同一采集对象会随着时间推移不断产生新的记录。按时间顺序持续产生并记录的数据,称为时序数据。 + +### **1\.2 时间序列** + +在时序数据场景中,一个采集点位会随着时间不断产生数据点,这些数据点按时间戳递增排列后,形成一条时间序列。从表格形式看,一条时间序列可以表示为由时间和值组成的数据表;从图形形式看,一条时间序列可以表示为随时间变化的趋势曲线。也可以形象的称之为设备的“心电图”。 + +![](/img/%E5%BF%83%E7%94%B5%E5%9B%BE1.png) + +### **1\.3 设备(Device)** + +设备也称为实体、装备等,是实际场景中拥有物理量的设备或装置,可以是物理设备、测量装置或传感器集合。 + +常见示例如下: + +|场景|设备示例|标识方式示例| +|---|---|---| +|能源场景|风机|区域、场站、线路、机型、实例等| +|工厂场景|机械臂|物联网平台生成的唯一 ID| +|车联网场景|车辆|车辆识别代码 VIN| +|监控场景|CPU|机房、机架、Hostname、设备类型等| + +### **1\.4 测点(Timeseries / Field)** + +测点也可称为物理量、时间序列、时间线、信号量、指标、点位或测量值,是实际场景中检测装置记录的测量信息。通常,一个物理量代表一个采集点位,能够定期采集所在环境或设备的物理量。一个测点随时间产生的数据点按时间戳递增排列后,即形成一条时间序列。 + +常见示例如下: + +|场景|测点示例| +|---|---| +|能源电力场景|电流、电压、风速、转速| +|车联网场景|油量、车速、经度、纬度| +|工厂场景|温度、湿度| + + +### **1\.5 数据点(Data Point)** + +数据点由一个时间戳和一个数值组成。时间戳用于表示数据产生的时间,数值用于表示该测点在该时间的采集结果。数值可以为 BOOLEAN、FLOAT、INT32 等多种类型。 + +表格形式的时间序列中的一行,或趋势图中的一个点,都可以理解为一个数据点。 + +![](/img/DataPoint01.png) + +### **1\.6 采集频率(Frequency)** + +采集频率指物理量在一定时间内产生数据的次数。例如,一个温度传感器每秒采集一次温度数据,则采集频率为 1Hz,即每秒 1 次。 + +采集频率越高,单位时间内产生的数据点越多,对写入、存储和查询能力的要求也越高。 + +### **1\.7 数据保存时间(TTL)** + +TTL 用于指定数据的保存时间。超过 TTL 的数据将被自动删除。 + +合理使用 TTL 可以控制磁盘空间占用,避免磁盘写满等异常,并有助于维持查询性能、减少内存资源占用。 + +### **1\.8 元数据(Schema)** + +元数据是数据库的数据模型信息,用于描述数据的结构和定义。对于时序数据,元数据通常包括设备、测点、数据类型等信息。 + +### **1\.9 编码(Encoding)和压缩(Compression)** + +编码是一种压缩技术,用于将数据以二进制形式表示,从而提高存储效率。压缩是在编码后进一步压缩二进制数据,以提升存储效率。 + +> IoTDB 支持的编码和压缩详细信息请查看:[压缩和编码](../Technical-Insider/Encoding-and-Compression.md)。 + +## **2\. IoTDB 常见概念** + +本节介绍 IoTDB 数据模型、分布式和部署中的常见概念。这些概念用于说明 IoTDB 如何组织、管理和部署时序数据。 + +### **2\.1 数据模型相关概念** + +#### **2\.1\.1 数据模型(sql\_dialect)** + +IoTDB 支持树模型和表模型两种数据模型。两种模型管理的核心对象均为设备和测点,但组织方式和使用语法不同。 + +- 树模型:以层级路径的方式管理数据,一条路径对应一个设备的一个测点。 + +- 表模型:以关系表的方式管理数据,推荐一张表对应一类设备。 + +同一个集群实例中可以存在两种模型空间。不同模型的语法、数据库命名方式不同,默认不互相可见。 + +#### **2\.1\.2 数据库** + +在表模型中,数据库是表模型中的上层组织结构,可管理多类设备及其表。创建表、写入数据或查询数据前,通常需要先创建数据库。 + +#### **2\.1\.3 表** + +在表模型中,建模时推荐一张表对应一类设备,用于组织该类设备的时序数据。同类设备通常具有相同或相近的测点集合。 + +#### **2\.1\.4 时间列、标签列、属性列和测点列** + +表模型中的列按照用途可分为时间列、标签列、属性列和测点列。 + +|概念|说明| +|---|---| +|时间列(TIME)|每张表必须包含一个时间列,数据类型为 TIMESTAMP| +|标签列(TAG)|用于标识设备,可作为设备的联合主键,通常不随时间变化| +|属性列(ATTRIBUTE)|用于描述设备的静态属性,不随时间变化,可更新或新增| +|测点列(FIELD)|用于存储设备采集的测点值,值随时间变化| + +在数据筛选效率上,通常可理解为:时间列和标签列优先,其次是属性列,最后是测点列。 + +### **2\.2 分布式相关概念** + +IoTDB 支持以集群方式运行。集群中常见概念包括节点、Region 和多副本。一个常见的集群部署模式是 3C3D,即 3 个 ConfigNode 和 3 个 DataNode。 + +![](/img/Cluster-Concept01N.png) + +#### **2\.2\.1 节点** + +IoTDB 集群包括 ConfigNode、DataNode 和 AINode 三类节点。 + +- ConfigNode:管理集群的节点信息、配置信息、用户权限、元数据、分区信息等,负责分布式操作的调度和负载均衡。所有 ConfigNode 之间互为全量备份。 + +- DataNode:服务客户端请求,负责数据的存储和计算。 + +- AINode:提供机器学习能力,支持注册已训练好的机器学习模型,并通过 SQL 调用模型进行推理。 + +#### **2\.2\.2 数据分区(Region)** + +在 IoTDB 中,元数据和数据都被划分为较小的分区,即 Region,并由集群中的各个 DataNode 管理。 + +- SchemaRegion:元数据分区,用于管理一部分设备和测点的元数据。 + +- DataRegion:数据分区,用于管理一部分设备在一段时间内的数据。 + +不同 DataNode 上相同 RegionID 的 Region 互为副本。 + +#### **2\.2\.3 多副本** + +数据和元数据的副本数可配置。多副本可提供高可用服务。 + +|类别|配置项|单机推荐配置|集群推荐配置| +|---|---|---|---| +|元数据|schema\_replication\_factor|1|3| +|数据|data\_replication\_factor|1|2| + +### **2\.3 部署相关概念** + +IoTDB 有两种运行模式:单机模式和集群模式。 + +#### **2\.3\.1 单机模式** + +IoTDB 单机实例包括 1 个 ConfigNode、1 个 DataNode,即 1C1D。 + +\- **特点**:便于开发者安装部署,部署和维护成本较低,操作方便。 + +\- **适用场景**:资源有限或对高可用要求不高的场景,例如边缘端服务器。 + +- **部署方法**:[单机版部署](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md) + +#### **2\.3\.2 集群模式** + +IoTDB 集群实例为 3 个 ConfigNode 和不少于 3 个 DataNode,通常为 3 个 DataNode,即 3C3D;当部分节点出现故障时,剩余节点仍然能对外提供服务,保证数据库服务的高可用性,且可随节点增加提升数据库性能。 + +\- **特点**:具有高可用性、高扩展性,可通过增加 DataNode 提高系统性能。 + +\- **适用场景**:需要提供高可用和可靠性的企业级应用场景。 + +- **部署方法**:[集群版部署](../Deployment-and-Maintenance/Cluster-Deployment_apache.md) + +#### **2\.3\.3 特点总结** + +|维度|单机模式|集群模式| +|---|---|---| +|适用场景|边缘侧部署、对高可用要求不高|高可用性业务、容灾场景等| +|所需机器数量|1|≥3| +|安全可靠性|无法容忍单点故障|高,可容忍单点故障| +|扩展性|可扩展 DataNode 提升性能|可扩展 DataNode 提升性能| +|性能|可随 DataNode 数量扩展|可随 DataNode 数量扩展| + +- 单机模式和集群模式,部署步骤类似(逐个增加 ConfigNode 和 DataNode),仅副本数和可提供服务的最少节点数不同。 diff --git a/src/zh/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/zh/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md index 594884a24..fb8b22417 100644 --- a/src/zh/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/zh/UserGuide/Master/Table/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ ## 1. 时序数据模型 -在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[基础概念](../Background-knowledge/Common-Concepts_apache.md) ## 2. IoTDB 的树表孪生模型 diff --git a/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_apache.md b/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_apache.md index ed73269c0..622df52a5 100644 --- a/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_apache.md +++ b/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ 1. 数据库建模设计:数据库建模是创建数据库系统的重要步骤,它涉及到设计数据的结构和关系,以确保数据的组织方式能够满足特定应用的需求,下面的文档将会帮助您快速了解 IoTDB 的建模设计: - - 时序概念介绍:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - 时序概念介绍:[基础概念](../Background-knowledge/Common-Concepts_apache.md) - 建模设计介绍:[建模方案设计](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/zh/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md b/src/zh/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..4fa7c6741 --- /dev/null +++ b/src/zh/UserGuide/Master/Tree/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,189 @@ +# 基础概念 + +## 1. 时序数据库通用概念 + +本节介绍时序数据库领域中常见的基础概念,包括时序数据、时间序列、设备、测点、数据点、采集频率、TTL、元数据、编码和压缩。 + +### 1.1 时序数据 + +在物联网、工业生产、能源电力、车联网、基础设施监测等场景中,设备通常会通过传感器持续采集自身或环境的状态数据。例如,电机采集电压和电流,风机采集叶片转速、角速度和发电功率,车辆采集经纬度、速度和油耗,桥梁采集振动频率、挠度和位移量。 + +![](/img/%E6%97%B6%E5%BA%8F%E6%95%B0%E6%8D%AE%E4%BB%8B%E7%BB%8D.png) + +这类数据的共同特点是与时间相关:同一采集对象会随着时间推移不断产生新的记录。按时间顺序持续产生并记录的数据,称为时序数据。 + +### 1.2 时间序列 + +在时序数据场景中,一个采集点位会随着时间不断产生数据点,这些数据点按时间戳递增排列后,形成一条时间序列。从表格形式看,一条时间序列可以表示为由时间和值组成的数据表;从图形形式看,一条时间序列可以表示为随时间变化的趋势曲线,也可以形象地称为设备的“心电图”。 + +![](/img/%E5%BF%83%E7%94%B5%E5%9B%BE1.png) + +### 1.3 设备(Device) + +设备也称为实体、装备等,是实际场景中拥有物理量的设备或装置,可以是物理设备、测量装置或传感器集合。 + +常见示例如下: + +| 场景 | 设备示例 | 标识方式示例 | +| --- | --- | --- | +| 能源场景 | 风机 | 区域、场站、线路、机型、实例等 | +| 工厂场景 | 机械臂 | 物联网平台生成的唯一 ID | +| 车联网场景 | 车辆 | 车辆识别代码 VIN | +| 监控场景 | CPU | 机房、机架、Hostname、设备类型等 | + +### 1.4 测点(Timeseries) + +测点也可称为物理量、时间序列、时间线、信号量、指标、点位或测量值,是实际场景中检测装置记录的测量信息。通常,一个物理量代表一个采集点位,能够定期采集所在环境或设备的物理量。一个测点随时间产生的数据点按时间戳递增排列后,即形成一条时间序列。 + +常见示例如下: + +| 场景 | 测点示例 | +| --- | --- | +| 能源电力场景 | 电流、电压、风速、转速 | +| 车联网场景 | 油量、车速、经度、纬度 | +| 工厂场景 | 温度、湿度 | + +### 1.5 数据点(Data Point) + +数据点由一个时间戳和一个数值组成。时间戳用于表示数据产生的时间,数值用于表示该测点在该时间的采集结果。数值可以为 BOOLEAN、FLOAT、INT32 等多种类型。 + +表格形式的时间序列中的一行,或趋势图中的一个点,都可以理解为一个数据点。 + +![](/img/DataPoint01.png) + + +### 1.6 采集频率(Frequency) + +采集频率指物理量在一定时间内产生数据的次数。例如,一个温度传感器每秒采集一次温度数据,则采集频率为 1Hz,即每秒 1 次。 + +采集频率越高,单位时间内产生的数据点越多,对写入、存储和查询能力的要求也越高。 + +### 1.7 数据保存时间(TTL) + +TTL 用于指定数据的保存时间。超过 TTL 的数据将被自动删除。 + +合理使用 TTL 可以控制磁盘空间占用,避免磁盘写满等异常,并有助于维持查询性能、减少内存资源占用。 + +### 1.8 元数据(Schema) + +元数据是数据库的数据模型信息,用于描述数据的结构和定义。对于树模型,元数据通常包括路径层级、设备、测点、数据类型、编码和压缩方式等信息。 + +### 1.9 编码(Encoding)和压缩(Compression) + +编码是一种压缩技术,用于将数据以二进制形式表示,从而提高存储效率。压缩是在编码后进一步压缩二进制数据,以提升存储效率。 + +> IoTDB 支持的编码和压缩详细信息请查看:[压缩和编码](../Technical-Insider/Encoding-and-Compression.md)。 + +## 2. IoTDB 常见概念 + +本节介绍 IoTDB 树模型、分布式和部署中的常见概念。这些概念用于说明 IoTDB 如何以层级路径组织、管理和部署时序数据。 + +### 2.1 数据模型相关概念 + +#### 2.1.1 数据模型(sql_dialect) + +IoTDB 支持树模型和表模型两种数据模型。两种模型管理的核心对象均为设备和测点,但组织方式和使用语法不同。 + +- 树模型:以层级路径的方式管理数据,一条路径对应一个设备的一个测点。 + +- 表模型:以关系表的方式管理数据,推荐一张表对应一类设备。 + +同一个集群实例中可以存在两种模型空间。不同模型的语法、数据库命名方式不同,默认不互相可见。 + +#### 2.1.2 数据库 + +树模型中的数据库是以 root. 为前缀的一段路径,可理解为树模型数据的上层管理边界。建模时通常推荐仅使用 root 的下一级节点作为数据库,例如 root.db。 + +数据库的父节点或子节点不能再设置为数据库。一个数据库也可以充分利用机器资源,通常无需为了性能原因创建多个数据库。 + +#### 2.1.3 时间序列与设备 + +时间序列(测点)是以数据库路径为前缀、由英文句号(.)分割的完整路径,可包含任意多个层级。每条时间序列可以有独立的数据类型、编码方式和压缩方式。 + +树模型中通常将倒数第二级视为设备。例如 root.db.turbine.device1.metric1 中,device1 这一层级即为设备,metric1 为测点。设备无法单独创建,通常随时间序列创建而存在。 + +建模时,建议仅将能够唯一定位时间序列的标签放入路径中,一般不超过 10 层;低基数标签尽量放在前面,便于系统压缩公共前缀。 + +> 如果设备数量较少但单设备测点数量很多,可在末级增加 .value 等层级,使倒数第二层节点数量更充足,例如 root.db.device01.metric.value。 + +#### 2.1.4 别名、标签和属性 + +创建时间序列时,可以为测点添加别名、标签和属性。别名与测点绑定,可在使用原测点名的场景中等价使用;SQL 查询中的临时别名只替代本次查询结果中的名称,不与时间序列绑定。 + +| 概念 | 作用 | +| --- | --- | +| 别名(Alias) | 绑定到测点,用于替代原测点名进行访问 | +| 标签(Tag) | 可用于查询时间序列路径,系统维护“标签 -> 时间序列路径”的索引 | +| 属性(Attribute) | 用于描述时间序列,只能从时间序列路径查询属性信息 | + + +### 2.2 分布式相关概念 + +IoTDB 支持以集群方式运行。集群中常见概念包括节点、Region 和多副本。一个常见的集群部署模式是 3C3D,即 3 个 ConfigNode 和 3 个 DataNode。 + +![](/img/Cluster-Concept01N.png) + +#### 2.2.1 节点 + +IoTDB 集群包括 ConfigNode、DataNode 和 AINode 三类节点。 + +- ConfigNode:管理集群的节点信息、配置信息、用户权限、元数据、分区信息等,负责分布式操作的调度和负载均衡。所有 ConfigNode 之间互为全量备份。 + +- DataNode:服务客户端请求,负责数据的存储和计算。 + +- AINode:提供机器学习能力,支持注册已训练好的机器学习模型,并通过 SQL 调用模型进行推理。 + +#### 2.2.2 数据分区(Region) + +在 IoTDB 中,元数据和数据都被划分为较小的分区,即 Region,并由集群中的各个 DataNode 管理。 + +- SchemaRegion:元数据分区,用于管理一部分设备和测点的元数据。 + +- DataRegion:数据分区,用于管理一部分设备在一段时间内的数据。 + +不同 DataNode 上相同 RegionID 的 Region 互为副本。 + +#### 2.2.3 多副本 + +数据和元数据的副本数可配置。多副本可提供高可用服务。 + +| 类别 | 配置项 | 单机推荐配置 | 集群推荐配置 | +| --- | --- | --- | --- | +| 元数据 | schema_replication_factor | 1 | 3 | +| 数据 | data_replication_factor | 1 | 2 | + +### 2.3 部署相关概念 + +IoTDB 有两种运行模式:单机模式和集群模式。 + +#### 2.3.1 单机模式 + +IoTDB 单机实例包括 1 个 ConfigNode、1 个 DataNode,即 1C1D。 + +- 特点:便于开发者安装部署,部署和维护成本较低,操作方便。 + +- 适用场景:资源有限或对高可用要求不高的场景,例如边缘端服务器。 + +- 部署方法:[单机版部署](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md)。 + +#### 2.3.2 集群模式 + +IoTDB 集群实例为 3 个 ConfigNode 和不少于 3 个 DataNode,通常为 3 个 DataNode,即 3C3D;当部分节点出现故障时,剩余节点仍然能对外提供服务,保证数据库服务的高可用性,且可随节点增加提升数据库性能。 + +- 特点:具有高可用性、高扩展性,可通过增加 DataNode 提高系统性能。 + +- 适用场景:需要提供高可用和可靠性的企业级应用场景。 + +- 部署方法:[集群版部署](../Deployment-and-Maintenance/Cluster-Deployment_apache.md)。 + +#### 2.3.3 特点总结 + +| 维度 | 单机模式 | 集群模式 | +| --- | --- | --- | +| 适用场景 | 边缘侧部署、对高可用要求不高 | 高可用性业务、容灾场景等 | +| 所需机器数量 | 1 | ≥3 | +| 安全可靠性 | 无法容忍单点故障 | 高,可容忍单点故障 | +| 扩展性 | 可扩展 DataNode 提升性能 | 可扩展 DataNode 提升性能 | +| 性能 | 可随 DataNode 数量扩展 | 可随 DataNode 数量扩展 | + +单机模式和集群模式,部署步骤类似(逐个增加 ConfigNode 和 DataNode),仅副本数和可提供服务的最少节点数不同。 diff --git a/src/zh/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/zh/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md index 911a612f9..0584b3727 100644 --- a/src/zh/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/zh/UserGuide/Master/Tree/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ ## 1. 时序数据模型 -在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[基础概念](../Background-knowledge/Common-Concepts_apache.md) ## 2. IoTDB 的树表孪生模型 diff --git a/src/zh/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md b/src/zh/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md index 4f2d8b947..8b2e3cdb1 100644 --- a/src/zh/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md +++ b/src/zh/UserGuide/Master/Tree/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ 1. 数据库建模设计:数据库建模是创建数据库系统的重要步骤,它涉及到设计数据的结构和关系,以确保数据的组织方式能够满足特定应用的需求,下面的文档将会帮助您快速了解 IoTDB 的建模设计: - - 时序概念介绍:[走进时序数据](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - 时序概念介绍:[基础概念](../Background-knowledge/Common-Concepts_apache.md) - 建模设计介绍:[数据模型介绍](../Background-knowledge/Data-Model-and-Terminology_apache.md) @@ -88,4 +88,4 @@ IoTDB 除了自身拥有丰富的功能外,其周边的工具体系包含的 ## 5. 使用过程中遇到问题? -如果您在安装或使用过程中遇到困难,可以移步至 [常见问题](../FAQ/Frequently-asked-questions.md) 中进行查看 \ No newline at end of file +如果您在安装或使用过程中遇到困难,可以移步至 [常见问题](../FAQ/Frequently-asked-questions.md) 中进行查看 diff --git a/src/zh/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md b/src/zh/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..0c4ab0c4a --- /dev/null +++ b/src/zh/UserGuide/latest-Table/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,183 @@ +# 基础概念 + +## **1\. 时序数据库通用概念** + +本节介绍时序数据库领域中常见的基础概念,包括时序数据、时间序列、设备、测点、数据点、采集频率、TTL、元数据、编码和压缩。 + +### **1\.1 时序数据** + +在物联网、工业生产、能源电力、车联网、基础设施监测等场景中,设备通常会通过传感器持续采集自身或环境的状态数据。例如,电机采集电压和电流,风机采集叶片转速、角速度和发电功率,车辆采集经纬度、速度和油耗,桥梁采集振动频率、挠度和位移量。 + +![](/img/%E6%97%B6%E5%BA%8F%E6%95%B0%E6%8D%AE%E4%BB%8B%E7%BB%8D.png) + +这类数据的共同特点是与时间相关:同一采集对象会随着时间推移不断产生新的记录。按时间顺序持续产生并记录的数据,称为时序数据。 + +### **1\.2 时间序列** + +在时序数据场景中,一个采集点位会随着时间不断产生数据点,这些数据点按时间戳递增排列后,形成一条时间序列。从表格形式看,一条时间序列可以表示为由时间和值组成的数据表;从图形形式看,一条时间序列可以表示为随时间变化的趋势曲线。也可以形象的称之为设备的“心电图”。 + +![](/img/%E5%BF%83%E7%94%B5%E5%9B%BE1.png) + +### **1\.3 设备(Device)** + +设备也称为实体、装备等,是实际场景中拥有物理量的设备或装置,可以是物理设备、测量装置或传感器集合。 + +常见示例如下: + +|场景|设备示例|标识方式示例| +|---|---|---| +|能源场景|风机|区域、场站、线路、机型、实例等| +|工厂场景|机械臂|物联网平台生成的唯一 ID| +|车联网场景|车辆|车辆识别代码 VIN| +|监控场景|CPU|机房、机架、Hostname、设备类型等| + +### **1\.4 测点(Timeseries / Field)** + +测点也可称为物理量、时间序列、时间线、信号量、指标、点位或测量值,是实际场景中检测装置记录的测量信息。通常,一个物理量代表一个采集点位,能够定期采集所在环境或设备的物理量。一个测点随时间产生的数据点按时间戳递增排列后,即形成一条时间序列。 + +常见示例如下: + +|场景|测点示例| +|---|---| +|能源电力场景|电流、电压、风速、转速| +|车联网场景|油量、车速、经度、纬度| +|工厂场景|温度、湿度| + + +### **1\.5 数据点(Data Point)** + +数据点由一个时间戳和一个数值组成。时间戳用于表示数据产生的时间,数值用于表示该测点在该时间的采集结果。数值可以为 BOOLEAN、FLOAT、INT32 等多种类型。 + +表格形式的时间序列中的一行,或趋势图中的一个点,都可以理解为一个数据点。 + +![](/img/DataPoint01.png) + +### **1\.6 采集频率(Frequency)** + +采集频率指物理量在一定时间内产生数据的次数。例如,一个温度传感器每秒采集一次温度数据,则采集频率为 1Hz,即每秒 1 次。 + +采集频率越高,单位时间内产生的数据点越多,对写入、存储和查询能力的要求也越高。 + +### **1\.7 数据保存时间(TTL)** + +TTL 用于指定数据的保存时间。超过 TTL 的数据将被自动删除。 + +合理使用 TTL 可以控制磁盘空间占用,避免磁盘写满等异常,并有助于维持查询性能、减少内存资源占用。 + +### **1\.8 元数据(Schema)** + +元数据是数据库的数据模型信息,用于描述数据的结构和定义。对于时序数据,元数据通常包括设备、测点、数据类型等信息。 + +### **1\.9 编码(Encoding)和压缩(Compression)** + +编码是一种压缩技术,用于将数据以二进制形式表示,从而提高存储效率。压缩是在编码后进一步压缩二进制数据,以提升存储效率。 + +> IoTDB 支持的编码和压缩详细信息请查看:[压缩和编码](../Technical-Insider/Encoding-and-Compression.md)。 + +## **2\. IoTDB 常见概念** + +本节介绍 IoTDB 数据模型、分布式和部署中的常见概念。这些概念用于说明 IoTDB 如何组织、管理和部署时序数据。 + +### **2\.1 数据模型相关概念** + +#### **2\.1\.1 数据模型(sql\_dialect)** + +IoTDB 支持树模型和表模型两种数据模型。两种模型管理的核心对象均为设备和测点,但组织方式和使用语法不同。 + +- 树模型:以层级路径的方式管理数据,一条路径对应一个设备的一个测点。 + +- 表模型:以关系表的方式管理数据,推荐一张表对应一类设备。 + +同一个集群实例中可以存在两种模型空间。不同模型的语法、数据库命名方式不同,默认不互相可见。 + +#### **2\.1\.2 数据库** + +在表模型中,数据库是表模型中的上层组织结构,可管理多类设备及其表。创建表、写入数据或查询数据前,通常需要先创建数据库。 + +#### **2\.1\.3 表** + +在表模型中,建模时推荐一张表对应一类设备,用于组织该类设备的时序数据。同类设备通常具有相同或相近的测点集合。 + +#### **2\.1\.4 时间列、标签列、属性列和测点列** + +表模型中的列按照用途可分为时间列、标签列、属性列和测点列。 + +|概念|说明| +|---|---| +|时间列(TIME)|每张表必须包含一个时间列,数据类型为 TIMESTAMP| +|标签列(TAG)|用于标识设备,可作为设备的联合主键,通常不随时间变化| +|属性列(ATTRIBUTE)|用于描述设备的静态属性,不随时间变化,可更新或新增| +|测点列(FIELD)|用于存储设备采集的测点值,值随时间变化| + +在数据筛选效率上,通常可理解为:时间列和标签列优先,其次是属性列,最后是测点列。 + +### **2\.2 分布式相关概念** + +IoTDB 支持以集群方式运行。集群中常见概念包括节点、Region 和多副本。一个常见的集群部署模式是 3C3D,即 3 个 ConfigNode 和 3 个 DataNode。 + +![](/img/Cluster-Concept01N.png) + +#### **2\.2\.1 节点** + +IoTDB 集群包括 ConfigNode、DataNode 和 AINode 三类节点。 + +- ConfigNode:管理集群的节点信息、配置信息、用户权限、元数据、分区信息等,负责分布式操作的调度和负载均衡。所有 ConfigNode 之间互为全量备份。 + +- DataNode:服务客户端请求,负责数据的存储和计算。 + +- AINode:提供机器学习能力,支持注册已训练好的机器学习模型,并通过 SQL 调用模型进行推理。 + +#### **2\.2\.2 数据分区(Region)** + +在 IoTDB 中,元数据和数据都被划分为较小的分区,即 Region,并由集群中的各个 DataNode 管理。 + +- SchemaRegion:元数据分区,用于管理一部分设备和测点的元数据。 + +- DataRegion:数据分区,用于管理一部分设备在一段时间内的数据。 + +不同 DataNode 上相同 RegionID 的 Region 互为副本。 + +#### **2\.2\.3 多副本** + +数据和元数据的副本数可配置。多副本可提供高可用服务。 + +|类别|配置项|单机推荐配置|集群推荐配置| +|---|---|---|---| +|元数据|schema\_replication\_factor|1|3| +|数据|data\_replication\_factor|1|2| + +### **2\.3 部署相关概念** + +IoTDB 有两种运行模式:单机模式和集群模式。 + +#### **2\.3\.1 单机模式** + +IoTDB 单机实例包括 1 个 ConfigNode、1 个 DataNode,即 1C1D。 + +\- **特点**:便于开发者安装部署,部署和维护成本较低,操作方便。 + +\- **适用场景**:资源有限或对高可用要求不高的场景,例如边缘端服务器。 + +- **部署方法**:[单机版部署](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md) + +#### **2\.3\.2 集群模式** + +IoTDB 集群实例为 3 个 ConfigNode 和不少于 3 个 DataNode,通常为 3 个 DataNode,即 3C3D;当部分节点出现故障时,剩余节点仍然能对外提供服务,保证数据库服务的高可用性,且可随节点增加提升数据库性能。 + +\- **特点**:具有高可用性、高扩展性,可通过增加 DataNode 提高系统性能。 + +\- **适用场景**:需要提供高可用和可靠性的企业级应用场景。 + +- **部署方法**:[集群版部署](../Deployment-and-Maintenance/Cluster-Deployment_apache.md) + +#### **2\.3\.3 特点总结** + +|维度|单机模式|集群模式| +|---|---|---| +|适用场景|边缘侧部署、对高可用要求不高|高可用性业务、容灾场景等| +|所需机器数量|1|≥3| +|安全可靠性|无法容忍单点故障|高,可容忍单点故障| +|扩展性|可扩展 DataNode 提升性能|可扩展 DataNode 提升性能| +|性能|可随 DataNode 数量扩展|可随 DataNode 数量扩展| + +- 单机模式和集群模式,部署步骤类似(逐个增加 ConfigNode 和 DataNode),仅副本数和可提供服务的最少节点数不同。 diff --git a/src/zh/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/zh/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md index 594884a24..fb8b22417 100644 --- a/src/zh/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/zh/UserGuide/latest-Table/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ ## 1. 时序数据模型 -在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[基础概念](../Background-knowledge/Common-Concepts_apache.md) ## 2. IoTDB 的树表孪生模型 diff --git a/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_apache.md b/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_apache.md index ed73269c0..622df52a5 100644 --- a/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_apache.md +++ b/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ 1. 数据库建模设计:数据库建模是创建数据库系统的重要步骤,它涉及到设计数据的结构和关系,以确保数据的组织方式能够满足特定应用的需求,下面的文档将会帮助您快速了解 IoTDB 的建模设计: - - 时序概念介绍:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - 时序概念介绍:[基础概念](../Background-knowledge/Common-Concepts_apache.md) - 建模设计介绍:[建模方案设计](../Background-knowledge/Data-Model-and-Terminology_apache.md) diff --git a/src/zh/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md b/src/zh/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md new file mode 100644 index 000000000..4fa7c6741 --- /dev/null +++ b/src/zh/UserGuide/latest/Background-knowledge/Common-Concepts_apache.md @@ -0,0 +1,189 @@ +# 基础概念 + +## 1. 时序数据库通用概念 + +本节介绍时序数据库领域中常见的基础概念,包括时序数据、时间序列、设备、测点、数据点、采集频率、TTL、元数据、编码和压缩。 + +### 1.1 时序数据 + +在物联网、工业生产、能源电力、车联网、基础设施监测等场景中,设备通常会通过传感器持续采集自身或环境的状态数据。例如,电机采集电压和电流,风机采集叶片转速、角速度和发电功率,车辆采集经纬度、速度和油耗,桥梁采集振动频率、挠度和位移量。 + +![](/img/%E6%97%B6%E5%BA%8F%E6%95%B0%E6%8D%AE%E4%BB%8B%E7%BB%8D.png) + +这类数据的共同特点是与时间相关:同一采集对象会随着时间推移不断产生新的记录。按时间顺序持续产生并记录的数据,称为时序数据。 + +### 1.2 时间序列 + +在时序数据场景中,一个采集点位会随着时间不断产生数据点,这些数据点按时间戳递增排列后,形成一条时间序列。从表格形式看,一条时间序列可以表示为由时间和值组成的数据表;从图形形式看,一条时间序列可以表示为随时间变化的趋势曲线,也可以形象地称为设备的“心电图”。 + +![](/img/%E5%BF%83%E7%94%B5%E5%9B%BE1.png) + +### 1.3 设备(Device) + +设备也称为实体、装备等,是实际场景中拥有物理量的设备或装置,可以是物理设备、测量装置或传感器集合。 + +常见示例如下: + +| 场景 | 设备示例 | 标识方式示例 | +| --- | --- | --- | +| 能源场景 | 风机 | 区域、场站、线路、机型、实例等 | +| 工厂场景 | 机械臂 | 物联网平台生成的唯一 ID | +| 车联网场景 | 车辆 | 车辆识别代码 VIN | +| 监控场景 | CPU | 机房、机架、Hostname、设备类型等 | + +### 1.4 测点(Timeseries) + +测点也可称为物理量、时间序列、时间线、信号量、指标、点位或测量值,是实际场景中检测装置记录的测量信息。通常,一个物理量代表一个采集点位,能够定期采集所在环境或设备的物理量。一个测点随时间产生的数据点按时间戳递增排列后,即形成一条时间序列。 + +常见示例如下: + +| 场景 | 测点示例 | +| --- | --- | +| 能源电力场景 | 电流、电压、风速、转速 | +| 车联网场景 | 油量、车速、经度、纬度 | +| 工厂场景 | 温度、湿度 | + +### 1.5 数据点(Data Point) + +数据点由一个时间戳和一个数值组成。时间戳用于表示数据产生的时间,数值用于表示该测点在该时间的采集结果。数值可以为 BOOLEAN、FLOAT、INT32 等多种类型。 + +表格形式的时间序列中的一行,或趋势图中的一个点,都可以理解为一个数据点。 + +![](/img/DataPoint01.png) + + +### 1.6 采集频率(Frequency) + +采集频率指物理量在一定时间内产生数据的次数。例如,一个温度传感器每秒采集一次温度数据,则采集频率为 1Hz,即每秒 1 次。 + +采集频率越高,单位时间内产生的数据点越多,对写入、存储和查询能力的要求也越高。 + +### 1.7 数据保存时间(TTL) + +TTL 用于指定数据的保存时间。超过 TTL 的数据将被自动删除。 + +合理使用 TTL 可以控制磁盘空间占用,避免磁盘写满等异常,并有助于维持查询性能、减少内存资源占用。 + +### 1.8 元数据(Schema) + +元数据是数据库的数据模型信息,用于描述数据的结构和定义。对于树模型,元数据通常包括路径层级、设备、测点、数据类型、编码和压缩方式等信息。 + +### 1.9 编码(Encoding)和压缩(Compression) + +编码是一种压缩技术,用于将数据以二进制形式表示,从而提高存储效率。压缩是在编码后进一步压缩二进制数据,以提升存储效率。 + +> IoTDB 支持的编码和压缩详细信息请查看:[压缩和编码](../Technical-Insider/Encoding-and-Compression.md)。 + +## 2. IoTDB 常见概念 + +本节介绍 IoTDB 树模型、分布式和部署中的常见概念。这些概念用于说明 IoTDB 如何以层级路径组织、管理和部署时序数据。 + +### 2.1 数据模型相关概念 + +#### 2.1.1 数据模型(sql_dialect) + +IoTDB 支持树模型和表模型两种数据模型。两种模型管理的核心对象均为设备和测点,但组织方式和使用语法不同。 + +- 树模型:以层级路径的方式管理数据,一条路径对应一个设备的一个测点。 + +- 表模型:以关系表的方式管理数据,推荐一张表对应一类设备。 + +同一个集群实例中可以存在两种模型空间。不同模型的语法、数据库命名方式不同,默认不互相可见。 + +#### 2.1.2 数据库 + +树模型中的数据库是以 root. 为前缀的一段路径,可理解为树模型数据的上层管理边界。建模时通常推荐仅使用 root 的下一级节点作为数据库,例如 root.db。 + +数据库的父节点或子节点不能再设置为数据库。一个数据库也可以充分利用机器资源,通常无需为了性能原因创建多个数据库。 + +#### 2.1.3 时间序列与设备 + +时间序列(测点)是以数据库路径为前缀、由英文句号(.)分割的完整路径,可包含任意多个层级。每条时间序列可以有独立的数据类型、编码方式和压缩方式。 + +树模型中通常将倒数第二级视为设备。例如 root.db.turbine.device1.metric1 中,device1 这一层级即为设备,metric1 为测点。设备无法单独创建,通常随时间序列创建而存在。 + +建模时,建议仅将能够唯一定位时间序列的标签放入路径中,一般不超过 10 层;低基数标签尽量放在前面,便于系统压缩公共前缀。 + +> 如果设备数量较少但单设备测点数量很多,可在末级增加 .value 等层级,使倒数第二层节点数量更充足,例如 root.db.device01.metric.value。 + +#### 2.1.4 别名、标签和属性 + +创建时间序列时,可以为测点添加别名、标签和属性。别名与测点绑定,可在使用原测点名的场景中等价使用;SQL 查询中的临时别名只替代本次查询结果中的名称,不与时间序列绑定。 + +| 概念 | 作用 | +| --- | --- | +| 别名(Alias) | 绑定到测点,用于替代原测点名进行访问 | +| 标签(Tag) | 可用于查询时间序列路径,系统维护“标签 -> 时间序列路径”的索引 | +| 属性(Attribute) | 用于描述时间序列,只能从时间序列路径查询属性信息 | + + +### 2.2 分布式相关概念 + +IoTDB 支持以集群方式运行。集群中常见概念包括节点、Region 和多副本。一个常见的集群部署模式是 3C3D,即 3 个 ConfigNode 和 3 个 DataNode。 + +![](/img/Cluster-Concept01N.png) + +#### 2.2.1 节点 + +IoTDB 集群包括 ConfigNode、DataNode 和 AINode 三类节点。 + +- ConfigNode:管理集群的节点信息、配置信息、用户权限、元数据、分区信息等,负责分布式操作的调度和负载均衡。所有 ConfigNode 之间互为全量备份。 + +- DataNode:服务客户端请求,负责数据的存储和计算。 + +- AINode:提供机器学习能力,支持注册已训练好的机器学习模型,并通过 SQL 调用模型进行推理。 + +#### 2.2.2 数据分区(Region) + +在 IoTDB 中,元数据和数据都被划分为较小的分区,即 Region,并由集群中的各个 DataNode 管理。 + +- SchemaRegion:元数据分区,用于管理一部分设备和测点的元数据。 + +- DataRegion:数据分区,用于管理一部分设备在一段时间内的数据。 + +不同 DataNode 上相同 RegionID 的 Region 互为副本。 + +#### 2.2.3 多副本 + +数据和元数据的副本数可配置。多副本可提供高可用服务。 + +| 类别 | 配置项 | 单机推荐配置 | 集群推荐配置 | +| --- | --- | --- | --- | +| 元数据 | schema_replication_factor | 1 | 3 | +| 数据 | data_replication_factor | 1 | 2 | + +### 2.3 部署相关概念 + +IoTDB 有两种运行模式:单机模式和集群模式。 + +#### 2.3.1 单机模式 + +IoTDB 单机实例包括 1 个 ConfigNode、1 个 DataNode,即 1C1D。 + +- 特点:便于开发者安装部署,部署和维护成本较低,操作方便。 + +- 适用场景:资源有限或对高可用要求不高的场景,例如边缘端服务器。 + +- 部署方法:[单机版部署](../Deployment-and-Maintenance/Stand-Alone-Deployment_apache.md)。 + +#### 2.3.2 集群模式 + +IoTDB 集群实例为 3 个 ConfigNode 和不少于 3 个 DataNode,通常为 3 个 DataNode,即 3C3D;当部分节点出现故障时,剩余节点仍然能对外提供服务,保证数据库服务的高可用性,且可随节点增加提升数据库性能。 + +- 特点:具有高可用性、高扩展性,可通过增加 DataNode 提高系统性能。 + +- 适用场景:需要提供高可用和可靠性的企业级应用场景。 + +- 部署方法:[集群版部署](../Deployment-and-Maintenance/Cluster-Deployment_apache.md)。 + +#### 2.3.3 特点总结 + +| 维度 | 单机模式 | 集群模式 | +| --- | --- | --- | +| 适用场景 | 边缘侧部署、对高可用要求不高 | 高可用性业务、容灾场景等 | +| 所需机器数量 | 1 | ≥3 | +| 安全可靠性 | 无法容忍单点故障 | 高,可容忍单点故障 | +| 扩展性 | 可扩展 DataNode 提升性能 | 可扩展 DataNode 提升性能 | +| 性能 | 可随 DataNode 数量扩展 | 可随 DataNode 数量扩展 | + +单机模式和集群模式,部署步骤类似(逐个增加 ConfigNode 和 DataNode),仅副本数和可提供服务的最少节点数不同。 diff --git a/src/zh/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md b/src/zh/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md index 911a612f9..0584b3727 100644 --- a/src/zh/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md +++ b/src/zh/UserGuide/latest/Background-knowledge/Data-Model-and-Terminology_apache.md @@ -25,7 +25,7 @@ ## 1. 时序数据模型 -在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[时序数据模型](../Background-knowledge/Navigating_Time_Series_Data_apache.md) +在构建IoTDB建模方案前,需要先了解时序数据和时序数据模型,详细内容见此页面:[基础概念](../Background-knowledge/Common-Concepts_apache.md) ## 2. IoTDB 的树表孪生模型 diff --git a/src/zh/UserGuide/latest/QuickStart/QuickStart_apache.md b/src/zh/UserGuide/latest/QuickStart/QuickStart_apache.md index 4f2d8b947..8b2e3cdb1 100644 --- a/src/zh/UserGuide/latest/QuickStart/QuickStart_apache.md +++ b/src/zh/UserGuide/latest/QuickStart/QuickStart_apache.md @@ -45,7 +45,7 @@ 1. 数据库建模设计:数据库建模是创建数据库系统的重要步骤,它涉及到设计数据的结构和关系,以确保数据的组织方式能够满足特定应用的需求,下面的文档将会帮助您快速了解 IoTDB 的建模设计: - - 时序概念介绍:[走进时序数据](../Background-knowledge/Navigating_Time_Series_Data_apache.md) + - 时序概念介绍:[基础概念](../Background-knowledge/Common-Concepts_apache.md) - 建模设计介绍:[数据模型介绍](../Background-knowledge/Data-Model-and-Terminology_apache.md) @@ -88,4 +88,4 @@ IoTDB 除了自身拥有丰富的功能外,其周边的工具体系包含的 ## 5. 使用过程中遇到问题? -如果您在安装或使用过程中遇到困难,可以移步至 [常见问题](../FAQ/Frequently-asked-questions.md) 中进行查看 \ No newline at end of file +如果您在安装或使用过程中遇到困难,可以移步至 [常见问题](../FAQ/Frequently-asked-questions.md) 中进行查看