电商项目_ods层建设
2022/7/5 23:51:35
本文主要是介绍电商项目_ods层建设,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
ODS贴源层
1 ODS层存放您从业务系统获取的最原始的数据,是其他上层数据的源数据。 2 本项目中使用的ODS层数据主要包括: 用户基本信息、商品分类信息、商品信息、店铺信息、 订单数据、订单支付信息、活动信息、行为日志信息
业务数据库表介绍
中文含义 | MYSQL表名 | HIVE表名 |
---|---|---|
用户基本信息表 | nshop.customer | ods_nshop.ods_nshop_02_customer |
收货地址管理表 | nshop.customer_consignee | ods_nshop.ods_nshop_02_customer_consignee |
订单表 | nshop.orders | ods_nshop.ods_nshop_02_orders |
订单详情表 | nshop.order_detail | ods_nshop.ods_nshop_02_order_detail |
订单支付记录表 | nshop.orders_pay_records | ods_nshop.ods_nshop_02_orders_pay_records |
用户相关
- 用户基本信息表
create table if not exists nshop.customer ( customer_id varchar(20) NOT NULL COMMENT '用户ID', customer_login varchar(20) NOT NULL COMMENT '用户登录名', customer_nickname varchar(10) NOT NULL COMMENT '用户名(昵称)', customer_name varchar(10) NOT NULL COMMENT '用户真实姓名', customer_pass varchar(8) NOT NULL COMMENT '用户密码', customer_mobile varchar(20) NOT NULL COMMENT '用户手机', customer_idcard varchar(20) NOT NULL COMMENT '身份证', customer_gender TINYINT NOT NULL COMMENT '性别:1男 0女', customer_birthday varchar(10) NOT NULL COMMENT '出生年月', customer_age TINYINT NOT NULL COMMENT '年龄', customer_age_range varchar(2) NOT NULL COMMENT '年龄段', customer_email varchar(50) NULL COMMENT '用户邮箱', customer_natives varchar(10) NULL COMMENT '所在地区', customer_ctime bigint NULL COMMENT '创建时间', customer_utime bigint NULL COMMENT '修改时间', customer_device_num varchar(20) NOT NULL COMMENT '用户手机设备号', PRIMARY KEY (`customer_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 收货地址管理表
create table if not exists nshop.customer_consignee ( consignee_id varchar(20) NOT NULL COMMENT '收货地址ID', customer_id varchar(20) NOT NULL COMMENT '用户ID', consignee_name varchar(10) NOT NULL COMMENT '收货人', consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话', consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地区', consignee_addr varchar(200) NOT NULL COMMENT '收货人详细地址', consignee_tag varchar(10) NOT NULL COMMENT '标签:1家 2公司 3学校', ctime bigint NULL COMMENT '创建时间', PRIMARY KEY (`consignee_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `customer_attention` ( `customer_id` varchar(20) NOT NULL, `attention_id` varchar(20) NOT NULL, `attention_type` tinyint(4) NOT NULL, `attention_status` int(11) NOT NULL DEFAULT '1', `attention_ctime` bigint(20) NOT NULL, KEY `customer_id` (`customer_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8
商品相关
- 商品分类信息
create table if not exists nshop.category( category_code VARCHAR(10) NOT NULL COMMENT '分类编码', category_name VARCHAR(10) NOT NULL COMMENT '分类名称', category_parent_id VARCHAR(10) NULL COMMENT '父分类ID', category_status TINYINT NOT NULL DEFAULT 1 COMMENT '分类状态:0禁止,1启用', category_utime bigint NULL COMMENT '最后修改时间', PRIMARY KEY (`category_code`) )ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 供应商信息表
create table if not exists nshop.supplier( supplier_code varchar(10) NOT NULL COMMENT '供应商编码', supplier_name varchar(30) NOT NULL COMMENT '供应商名称', supplier_type TINYINT NOT NULL COMMENT '供应商类型:1.自营,2.官方 3其他', supplier_status TINYINT NOT NULL DEFAULT 1 COMMENT '状态:0禁止,1启用', supplier_utime bigint NULL COMMENT '最后修改时间', PRIMARY KEY (`supplier_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 商品信息表
create table if not exists nshop.product( product_code VARCHAR(20) NOT NULL COMMENT '商品ID(分类编码+供应商编码+编号)', product_name VARCHAR(20) NOT NULL COMMENT '商品名称', product_remark TEXT NOT NULL COMMENT '商品描述', category_code VARCHAR(10) NOT NULL COMMENT '分类ID', supplier_code varchar(10) NOT NULL COMMENT '商品的供应商编码', product_price DECIMAL(5,1) NOT NULL COMMENT '商品销售价格', product_weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格', product_publish_status TINYINT NOT NULL DEFAULT 0 COMMENT '上下架状态:0下架 1上架', product_audit_status TINYINT NOT NULL DEFAULT 0 COMMENT '审核状态:0未审核,1已审核', product_bar_code VARCHAR(50) NOT NULL COMMENT '国条码', product_weight FLOAT NULL COMMENT '商品重量', product_length FLOAT NULL COMMENT '商品长度', product_height FLOAT NULL COMMENT '商品高度', product_width FLOAT NULL COMMENT '商品宽度', product_colors SMALLINT UNSIGNED NOT NULL COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色', product_date varchar(10) NOT NULL COMMENT '生产日期', product_shelf_life INT NOT NULL COMMENT '商品有效期', product_ctime bigint NULL COMMENT '商品录入时间', product_utime bigint NULL COMMENT '最后修改时间', PRIMARY KEY (`product_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
订单相关
- 订单表
create table if not exists nshop.orders ( order_id VARCHAR(50) NOT NULL COMMENT '订单ID(时间+商品ID+4位随机)', customer_id varchar(20) NOT NULL COMMENT '下单用户ID', order_status TINYINT NOT NULL COMMENT '订单状态:1已提交; 2待支付 3出货中 4已发货 5已收货(完成) 6投诉 7退货', customer_ip varchar(20) NULL COMMENT '下单用户IP', user_longitude varchar(20) NULL COMMENT '用户地理:经度', user_latitude varchar(20) NULL COMMENT '用户地理:纬度', user_areacode varchar(10) NULL COMMENT '用户所在地区', consignee_name varchar(10) NOT NULL COMMENT '收货人', consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话', consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地址', pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ', pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号', pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付', district_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额', shipping_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '运费金额', payment_money DECIMAL(10,1) NOT NULL DEFAULT 0.0 COMMENT '支付金额', order_ctime bigint NULL COMMENT '创建时间', shipping_time bigint NULL COMMENT '发货时间', receive_time bigint NULL COMMENT '收货时间', PRIMARY KEY (`order_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 订单详情表
create table if not exists nshop.order_detail( order_detail_id varchar(20) NOT NULL COMMENT '订单详情表ID', order_id VARCHAR(50) NOT NULL COMMENT '订单表ID', product_id varchar(20) NOT NULL COMMENT '订单商品ID', product_name VARCHAR(50) NOT NULL COMMENT '商品名称', product_remark VARCHAR(100) NOT NULL COMMENT '商品描述', product_cnt INT NOT NULL DEFAULT 1 COMMENT '购买商品数量', product_price DECIMAL(5,1) NOT NULL COMMENT '购买商品单价', weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格', district_money DECIMAL(4,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额', is_activity NOT NULL DEFAULT 0 COMMENT '1:参加活动|0:没有参加活动', order_detail_ctime bigint NULL COMMENT '下单时间', PRIMARY KEY (`order_detail_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 订单支付记录
create table if not exists nshop.orders_pay_records ( pay_id varchar(20) NOT NULL COMMENT '支付记录ID', order_id varchar(30) NOT NULL COMMENT '订单ID', customer_id varchar(20) NOT NULL COMMENT '用户ID', pay_status varchar(5) NOT NULL COMMENT '支付状态:0 支付失败| 1 支付成功', pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ', pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号', pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付', pay_amount double(10,1) NOT NULL COMMENT '支付金额', pay_ctime bigint NULL COMMENT '创建时间', PRIMARY KEY (`pay_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
时间维度
create table if not exists nshop.date_dim ( date_day varchar(30) NOT NULL COMMENT '日期:yyyyMMdd 如20190520', date_day_desc varchar(30) NOT NULL COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日', date_day_month varchar(30) NOT NULL COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天', date_day_year varchar(30) NOT NULL COMMENT '日期:139 本年第几天 如2019年5月20日为139天', date_day_en varchar(30) NOT NULL COMMENT '日期:monday 星期几', date_week varchar(30) NOT NULL COMMENT '周:本月第几周 4 如 20190520为本月第4周', date_week_desc varchar(30) NOT NULL COMMENT '周:本月第几周 如 20190504', date_month varchar(30) NOT NULL COMMENT '月: 5 如 20190520为本年5月', date_month_en varchar(30) NOT NULL COMMENT '月: may ', date_month_desc varchar(30) NOT NULL COMMENT '月:如 201905', date_quarter varchar(30) NOT NULL COMMENT '季度:2', date_quarter_en varchar(30) NOT NULL COMMENT '季度:Q2', date_quarter_desc varchar(30) NOT NULL COMMENT '季度:2019Q2 如 20190520为2019Q2', date_year varchar(30) NOT NULL COMMENT '年:2019', PRIMARY KEY (`date_day`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
地区列表
create table if not exists nshop.area_dim ( region_code varchar(30) NOT NULL COMMENT '地区编码 如110105 | 130406 ', region_code_desc varchar(30) NOT NULL COMMENT '地区编码 如朝阳区 | 峰峰矿区', region_city varchar(30) NOT NULL COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸', region_city_desc varchar(30) NOT NULL COMMENT '地区编码 如1101 | 1304 邯郸市', region_province varchar(30) NOT NULL COMMENT '地区编码 如11 北京市 | 13 河北省', region_province_desc varchar(30) NOT NULL COMMENT '地区编码 如 北京市 | 河北', PRIMARY KEY (`region_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
通用字典表
create table if not exists nshop.comm_dim( dim_type VARCHAR(10) not NULL COMMENT '字典类型', dim_code VARCHAR(30) not NULL COMMENT '字典编码', dim_remark varchar(30) NULL COMMENT '字段描述', dim_ext1 varchar(30) NULL COMMENT '扩展1', dim_ext2 varchar(30) NULL COMMENT '扩展2', dim_ext3 varchar(30) NULL COMMENT '扩展3', dim_ext4 varchar(30) NULL COMMENT '扩展4', ct timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', PRIMARY KEY (`dim_type`,`dim_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
页面布局表
create table if not exists nshop.page_dim ( page_code varchar(30) NOT NULL COMMENT '页面编码', page_remark varchar(30) NULL COMMENT '页面描述', page_type varchar(5) NOT NULL COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)', page_target varchar(30) NULL COMMENT '页面对应实体编号(如产品、店铺)', page_ctime bigint NOT NULL COMMENT '创建时间', PRIMARY KEY (`page_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
同步业务数据到ODS
数仓ods层和业务数据表的对应关系
中文含义 | MYSQL表名 | HIVE表名 |
---|---|---|
用户基本信息表 | nshop.customer | ods_nshop.ods_02_customer |
用户关注表 | nshop_customer_attention | ods_nshop.ods_02_customer_attention |
用户收货地址管理表 | nshop.customer_consignee | ods_nshop.ods_02_customer_consignee |
订单表 | nshop.orders | ods_nshop.ods_02_orders |
订单详情表 | nshop.order_detail | ods_nshop.ods_02_order_detail |
订单支付记录表 | nshop.orders_pay_records | ods_nshop.ods_02_orders_pay_records |
商品分类表 | nshop.category | ods_nshop.dim_pub_category |
商品信息表 | nshop.product | ods_nshop.dim_pub_product |
供应商信息表 | nshop.supplier | ods_nshop.dim_pub_supplier |
页面布局表 | nshop.page_dim | ods_nshop.dim_pub_page |
地域字典表 | nshop.area_dim | ods_nshop.dim_pub_area |
通用字典表 | nshop.comm_dim | ods_nshop.dim_pub_comm |
时间字典表 | nshop.date_dim | ods_nshop.dim_pub_date |
数仓ods层库表建设
创建数据库
create database if not exists ods_nshop; use ods_nshop;
用户基本信息表
create external table if not exists ods_nshop.ods_02_customer ( customer_id string COMMENT '用户ID', customer_login string COMMENT '用户登录名', customer_nickname string COMMENT '用户名(昵称)', customer_name string COMMENT '用户真实姓名', customer_pass string COMMENT '用户密码', customer_mobile string COMMENT '用户手机', customer_idcard string COMMENT '身份证', customer_gender TINYINT COMMENT '性别:1男 0女', customer_birthday string COMMENT '出生年月', customer_age TINYINT COMMENT '年龄', customer_age_range string COMMENT '年龄段', customer_email string COMMENT '用户邮箱', customer_natives string COMMENT '所在地区', customer_ctime bigint COMMENT '创建时间', customer_utime bigint COMMENT '修改时间', customer_device_num string COMMENT '用户手机设备号' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/'
用户关注表
create external table if not exists ods_nshop.ods_02_customer_attention ( customer_id string COMMENT '用户ID', attention_id string COMMENT '关注对象ID', attention_type TINYINT COMMENT '关注类型:3店铺 4商品', attention_status bigint COMMENT '关注状态:1关注 0取消', attention_ctime bigint COMMENT '生成时间' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_attention/'
收货地址管理表
create external table if not exists ods_nshop.ods_02_customer_consignee ( consignee_id string COMMENT '收货地址ID', customer_id string COMMENT '用户ID', consignee_name string COMMENT '收货人', consignee_mobile string COMMENT '收货人电话', consignee_zipcode string COMMENT '收货人地区', consignee_addr string COMMENT '收货人详细地址', consignee_tag string COMMENT '标签:1家 2公司 3学校', ctime bigint COMMENT '创建时间' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee/'
商品分类表
create external table if not exists ods_nshop.dim_pub_category ( category_code string COMMENT '分类编码', category_name string COMMENT '分类名称', category_parent_id string COMMENT '父分类ID', category_status TINYINT COMMENT '分类状态:0禁止,1启用', category_utime bigint COMMENT '最后修改时间' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_category/'
供应商信息表
create external table if not exists ods_nshop.dim_pub_supplier( supplier_code string COMMENT '供应商编码', supplier_name string COMMENT '供应商名称', supplier_type TINYINT COMMENT '供应商类型:1.自营,2.官方 3其他', supplier_status TINYINT COMMENT '状态:0禁止,1启用', supplier_utime bigint COMMENT '最后修改时间' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_supplier/'
订单表
create external table if not exists ods_nshop.ods_02_orders ( order_id string COMMENT '订单ID(时间+商品ID+4位随机)', customer_id string COMMENT '下单用户ID', order_status TINYINT COMMENT '订单状态', customer_ip string COMMENT '下单用户IP', user_longitude string COMMENT ':经度', user_latitude string COMMENT '用户地理:纬度', user_areacode string COMMENT '用户所在地区', consignee_name string COMMENT '收货人', consignee_mobile string COMMENT '收货人电话', consignee_zipcode string COMMENT '收货人地址', pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ', pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号', pay_nettype string COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付', district_money DECIMAL(8,1) COMMENT '优惠金额', shipping_money DECIMAL(8,1) COMMENT '运费金额', payment_money DECIMAL(10,1) COMMENT '支付金额', order_ctime bigint COMMENT '创建时间', shipping_time bigint COMMENT '发货时间', receive_time bigint COMMENT '收货时间' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_orders/'
订单详情表
create external table if not exists ods_nshop.ods_02_order_detail( order_detail_id string COMMENT '订单详情表ID', order_id string COMMENT '订单表ID', product_id string COMMENT '订单商品ID', product_name string COMMENT '商品名称', product_remark string COMMENT '商品描述', product_cnt INT COMMENT '购买商品数量', product_price DECIMAL(5,1) COMMENT '购买商品单价', weighing_cost DECIMAL(2,1) COMMENT '商品加权价格', district_money DECIMAL(4,1) COMMENT '优惠金额', is_activity bigint COMMENT '1:参加活动|0:没有参加活动', order_detail_ctime bigint COMMENT '下单时间' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_order_detail/'
页面布局
create external table if not exists ods_nshop.dim_pub_page ( page_code string COMMENT '页面编码', page_remark string COMMENT '页面描述', page_type string COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)', page_target string COMMENT '页面对应实体编号(如产品、店铺)', page_ctime bigint COMMENT '创建时间' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_page/'
地域表
create external table if not exists ods_nshop.dim_pub_area ( region_code string COMMENT '地区编码 如110105 | 130406 ', region_code_desc string COMMENT '地区编码 如朝阳区 | 峰峰矿区', region_city string COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸', region_city_desc string COMMENT '地区编码 如1101 | 1304 邯郸市', region_province string COMMENT '地区编码 如11 北京市 | 13 河北省', region_province_desc string COMMENT '地区编码 如 北京市 | 河北' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_area/'
订单支付记录表
create external table if not exists ods_nshop.ods_02_orders_pay_records ( pay_id string COMMENT '支付记录ID', order_id string COMMENT '订单ID', customer_id string COMMENT '用户ID', pay_status string COMMENT '支付状态:0 支付失败| 1 支付成功', pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ', pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号', pay_nettype string COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付', pay_amount double COMMENT '支付金额', pay_ctime bigint COMMENT '创建时间' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records/'
商品信息表
create external table if not exists ods_nshop.dim_pub_product( product_code string COMMENT '商品ID(分类编码+供应商编码+编号)', product_name string COMMENT '商品名称', product_remark string COMMENT '商品描述', category_code string COMMENT '分类ID', supplier_code string COMMENT '商品的供应商编码', product_price DECIMAL COMMENT '商品销售价格', product_weighing_cost DECIMAL(2,1) COMMENT '商品加权价格', product_publish_status TINYINT COMMENT '上下架状态:0下架 1上架', product_audit_status TINYINT COMMENT '审核状态:0未审核,1已审核', product_bar_code string COMMENT '国条码', product_weight FLOAT COMMENT '商品重量', product_length FLOAT COMMENT '商品长度', product_height FLOAT COMMENT '商品高度', product_width FLOAT COMMENT '商品宽度', product_colors SMALLINT COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色', product_date string COMMENT '生产日期', product_shelf_life bigint COMMENT '商品有效期', product_ctime bigint COMMENT '商品录入时间', product_utime bigint COMMENT '最后修改时间' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_product/'
日期维度表
create external table if not exists ods_nshop.dim_pub_date ( date_day string COMMENT '日期:yyyyMMdd 如20190520', date_day_desc string COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日', date_day_month string COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天', date_day_year string COMMENT '日期:139 本年第几天 如2019年5月20日为139天', date_day_en string COMMENT '日期:monday 星期几', date_week string COMMENT '周:本月第几周 4 如 20190520为本月第4周', date_week_desc string COMMENT '周:本月第几周 如 20190504', date_month string COMMENT '月: 5 如 20190520为本年5月', date_month_en string COMMENT '月: may ', date_month_desc string COMMENT '月:如 201905', date_quarter string COMMENT '季度:2', date_quarter_en string COMMENT '季度:Q2', date_quarter_desc string COMMENT '季度:2019Q2 如 20190520为2019Q2', date_year string COMMENT '年:2019' ) location '/shujia/bigdata17/data/nshop/ods/dim_pub_date/'
Sqoop导入脚本
create external table if not exists ods_nshop.ods_02_customer ( customer_id string COMMENT '用户ID', customer_login string COMMENT '用户登录名', customer_nickname string COMMENT '用户名(昵称)', customer_name string COMMENT '用户真实姓名', customer_pass string COMMENT '用户密码', customer_mobile string COMMENT '用户手机', customer_idcard string COMMENT '身份证', customer_gender TINYINT COMMENT '性别:1男 0女', customer_birthday string COMMENT '出生年月', customer_age TINYINT COMMENT '年龄', customer_age_range string COMMENT '年龄段', customer_email string COMMENT '用户邮箱', customer_natives string COMMENT '所在地区', customer_ctime bigint COMMENT '创建时间', customer_utime bigint COMMENT '修改时间', customer_device_num string COMMENT '用户手机设备号' ) location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/' --这两个同步通过命令行执行,其他的通过脚本执行 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \ --username root \ --password 123456 \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer/ \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_customer \ --m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \ --username root \ --password 123456 \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_customer \ --delete-target-dir \ -m 1
sqoop脚本文件如下:
#!/bin/bash sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --query "select customer_id,attention_id,attention_type,attention_status,attention_ctime from customer_attention where \$CONDITIONS" \ --username root \ --password 123456 \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_attention \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_customer_attention \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --query "select consignee_id,customer_id,consignee_name,consignee_mobile,consignee_zipcode,consignee_addr,consignee_tag,ctime from customer_consignee where \$CONDITIONS" \ --username root \ --password 123456 \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_customer_consignee \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select category_code,category_name,category_parent_id,category_status,category_utime from category where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_category \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_category \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select supplier_code,supplier_name,supplier_type,supplier_status,supplier_utime from supplier where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_supplier \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_supplier \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select order_id,customer_id,order_status,customer_ip,customer_longitude,customer_latitude,customer_areacode,consignee_name,consignee_mobile,consignee_zipcode,pay_type,pay_code,pay_nettype,district_money,shipping_money,payment_money,order_ctime,shipping_time,receive_time from orders where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_orders \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select order_detail_id,order_id,product_id,product_name,product_remark,product_cnt,product_price,weighing_cost,district_money,is_activity,order_detail_ctime from order_detail where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_order_detail \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_order_detail \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select page_code,page_remark,page_type,page_target,page_ctime from page_dim where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_page \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_page \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select region_code,region_code_desc,region_city,region_city_desc,region_province,region_province_desc from area_dim where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_area \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_area \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select date_day,date_day_desc,date_day_month,date_day_year,date_day_en,date_week,date_week_desc,date_month,date_month_en,date_month_desc,date_quarter,date_quarter_en,date_quarter_desc,date_year from date_dim where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_date \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_date \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select pay_id,order_id,customer_id,pay_status,pay_type,pay_code,pay_nettype,pay_amount,pay_ctime from orders_pay_records where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records \ --hive-import \ --hive-database ods_nshop \ --hive-table ods_02_orders_pay_records \ --delete-target-dir \ -m 1 sqoop import \ --connect jdbc:mysql://master:3306/nshop \ --username root \ --password 123456 \ --query "select product_code,product_name,product_remark,category_code,supplier_code,product_price,product_weighing_cost,product_publish_status,product_audit_status,product_bar_code,product_weight,product_length,product_height,product_width,product_colors,product_date,product_shelf_life,product_ctime,product_utime from product where \$CONDITIONS" \ --target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_product \ --hive-import \ --hive-database ods_nshop \ --hive-table dim_pub_product \ --delete-target-dir \ -m 1
定时任务:
#设定定时任务 #crond定时方式的配置 vim /etc/crontab #在最后一行加上定时任务的配置 每小时执行一次 * */1 * * * root /usr/local/shell/test.sh /usr/local/soft/atao_file/xiangmu_1/sqoopshell.sh #修改crontab的配置后需要重启crontab,使配置生效 /bin/systemctl restart crond.service
埋点日志
日志数据我们直接load就可以了,下面的外部数据再使用flume采集,为了演示不同的导数据方式,这里就直接load了
电商平台在网页版或APP版终端上进行业务埋点,采集相关用户行为数据发送给服务器进行用户行为分析。
日志文件如下(截取部分)
{"action":"05","event_type":"01","customer_id":"20101000324999676","device_num":"586344","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"2","area_code":"41092","longitude":"116.35636","latitude":"40.06919","extinfo":"{\"target_type\":\"4\",\"target_keys\":\"20402\",\"target_order\":\"31\",\"target_ids\":\"[\\\"4320402595801\\\",\\\"4320402133801\\\",\\\"4320402919201\\\",\\\"4320402238501\\\"]\"}","ct":1567896035000} {"action":"05","event_type":"02","customer_id":"20101000405999595","device_num":"956236","device_type":"9","os":"2","os_version":"4.3","manufacturer":"08","carrier":"2","network_type":"3","area_code":"10015","longitude":"116.35985","latitude":"40.069590000000005","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320406544401\",\"target_action\":\"01\"}","ct":1567896035000} {"action":"05","event_type":"02","customer_id":"20101000648999352","device_num":"876263","device_type":"2","os":"2","os_version":"6.2","manufacturer":"07","carrier":"2","network_type":"2","area_code":"42058","longitude":"116.34721","latitude":"40.06685","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701402201\",\"target_action\":\"01\"}","ct":1567896035000} {"action":"07","event_type":"02","customer_id":"20101000729999271","device_num":"451481","device_type":"9","os":"2","os_version":"4.5","manufacturer":"09","carrier":"3","network_type":"2","area_code":"37162","longitude":"116.35737","latitude":"40.06962","extinfo":"{\"target_id\":\"4320207200701\"}","ct":1567896035000} {"action":"06","event_type":"","customer_id":"20101000810999190","device_num":"114649","device_type":"9","os":"2","os_version":"8.2","manufacturer":"02","carrier":"3","network_type":"0","area_code":"35062","longitude":"116.38244999999999","latitude":"40.06447","extinfo":"{}","ct":1567896035000} {"action":"07","event_type":"01","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_id\":\"4320701224301\"}","ct":1567896035000} {"action":"05","event_type":"02","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701224301\",\"target_action\":\"01\"}","ct":1567896035000} {"action":"08","event_type":"02","customer_id":"20101001377998623","device_num":"269526","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"3","area_code":"13053","longitude":"116.36325","latitude":"40.06456","extinfo":"{\"target_id\":\"4320901702901\"}","ct":1567896035000} {"action":"08","event_type":"01","customer_id":"20101001458998542","device_num":"252612","device_type":"9","os":"2","os_version":"7.5","manufacturer":"03","carrier":"2","network_type":"2","area_code":"10013","longitude":"116.39949","latitude":"40.069250000000004","extinfo":"{\"target_id\":\"4320901657101\"}","ct":1567896035000}
中文含义 | HIVE表名 |
---|---|
用户行为日志表 | ods_nshop.ods_nshop_01_useractlog |
用户行为日志表
创建hadoop目录
hadoop fs -mkdir -p '/shujia/bigdata17/data/nshop/ods/user_action_log/'
create external table if not exists ods_nshop.ods_nshop_01_useractlog( action string comment '行为类型:install安装|launch启动|interactive交互|page_enter_h5页面曝光|page_enter_native页面进入|exit退出', event_type string comment '行为类型:click点击|view浏览|slide滑动|input输入', customer_id string comment '用户id', device_num string comment '设备号', device_type string comment '设备类型', os string comment '手机系统', os_version string comment '手机系统版本', manufacturer string comment '手机制造商', carrier string comment '电信运营商', network_type string comment '网络类型', area_code string comment '地区编码', longitude string comment '经度', latitude string comment '纬度', extinfo string comment '扩展信息(json格式)', duration string comment '停留时长', ct bigint comment '创建时间' ) partitioned by (bdp_day string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE location '/shujia/bigdata17/data/nshop/ods/user_action_log/'
注意
json格式数据表需要通过serde机制处理 1 在hive-site.xml中设置三方jar包 <property> <name>hive.aux.jars.path</name> <value>/usr/local/hive-3.1.2/lib/</value> </property> 2 在hive.aux.jars.path设置的路径中增加hive-hcatalog-core-3.1.2.jar [root@master hive-3.1.2]# cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar /usr/local/hive-3.1.2/lib/
同步数据到表
将0000文件上传服务器,然后使用hive命令映射到hive表
hive> load data local inpath "/usr/local/soft/atao_file/xiangmu_file/000000_0" into table ods_nshop.ods_nshop_01_useractlog partition(bdp_day="20220630");
外部数据
外部数据来源主要有: 1 企业间的接口调用(如其他企业平台上的广告投放营销) 2 公共数据的爬虫数据(公共数据、友商数据、社区|社交平台公开数据)
中文含义 | HIVE表名 |
---|---|
广告投放信息表 | ods_nshop.ods_nshop_01_releasedatas |
广告投放信息
create external table if not exists ods_nshop.ods_nshop_01_releasedatas( device_num string comment '设备号', device_type string comment '设备类型', os string comment '手机系统', os_version string comment '手机系统版本', manufacturer string comment '手机制造商', area_code string comment '地区编码', release_sid string comment '投放请求id', release_session string comment '投放会话id', release_sources string comment '投放渠道', release_params string comment '投放请求参数', ct string comment '创建时间' ) partitioned by (bdp_day string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as textfile location '/shujia/bigdata17/data/nshop/ods/release/'
采集数据
外部数据需要使用Flume进行数据采集操作,将采集到的数据映射到Hive中,首先创建Hive的表
外部数据如下(截取)
723748,2,2,6.1,3,540529,1572654785000fvkpnp,1572650000000000000000000,tencent,ip=3.61.2.222&deviceNum=723748&lon=null&lat=null&aid=null&ctime=1572654905000&sources=tencent&session=1572654785000723748416198&productPage=4320308033801,1573260000000 928223,2,2,2.8,7,441881,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000 928223,2,2,7.1,3,653123,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000 784295,9,2,9.7,5,632726,1572654785000qvisko,1572650000000000000000000,baidu,ip=210.121.223.225&deviceNum=784295&lon=null&lat=null&aid=null&ctime=1572655033000&sources=baidu&session=1572654785000784295423824&productPage=4320206519001,1573260000000 496626,9,2,4.8,5,540522,1572654785000jdesth,1572650000000000000000000,douyin,ip=231.61.62.159&deviceNum=496626&lon=null&lat=null&aid=null&ctime=1572654947000&sources=douyin&session=1572654785000496626329353&productPage=4320303786701,1573260000000 235966,2,2,2.7,2,610602,1572654785000updxen,1572650000000000000000000,ximalaya,ip=246.85.131.184&deviceNum=235966&lon=null&lat=null&aid=null&ctime=1572654853000&sources=ximalaya&session=1572654785000235966421378&productPage=4320505806501,1573260000000 275157,9,2,2.4,5,632724,1572654785000pqwgch,1572650000000000000000000,liebao,ip=106.227.203.6&deviceNum=275157&lon=null&lat=null&aid=null&ctime=1572655014000&sources=liebao&session=1572654785000275157768731&productPage=4320603904901,1573260000000 322548,2,2,5.2,2,632822,1572654785000wtjppb,1572650000000000000000000,liebao,ip=105.247.218.235&deviceNum=322548&lon=null&lat=null&aid=null&ctime=1572654981000&sources=liebao&session=1572654785000322548126172&productPage=4320403173201,1573260000000 432626,9,2,1.4,5,540227,1572654785000dxifed,1572650000000000000000000,netyex,ip=216.169.184.123&deviceNum=432626&lon=null&lat=null&aid=null&ctime=1572654916000&sources=netyex&session=1572654785000432626892945&productPage=4320808279201,1573260000000 693898,9,2,2.2,9,620924,1572654785000tnjomq,1572650000000000000000000,ximalaya,ip=130.238.115.6&deviceNum=693898&lon=null&lat=null&aid=null&ctime=1572654861000&sources=ximalaya&session=1572654785000693898367422&productPage=4320503604301,1573260000000 164237,9,2,9.2,3,360423,1572654785000bpxghm,1572650000000000000000000,douyin,ip=119.26.191.164&deviceNum=164237&lon=null&lat=null&aid=null&ctime=1572655050000&sources=douyin&session=1572654785000164237721644&productPage=4320804560801,1573260000000
配置Flume采集文件
监控文件如下
# 命名 a1.sources = r1 a1.channels = c1 a1.sinks = s1 # 关联 a1.sources.r1.channels = c1 a1.sinks.s1.channel = c1 # 配置source类型和属性 a1.sources.r1.type = TAILDIR a1.sources.r1.filegroups = g1 a1.sources.r1.filegroups.g1 = /usr/local/soft/flume-1.9.0/data/.*.csv # 元数据保存位置 a1.sources.r1.positionFile = /usr/local/soft/flume-1.9.0/flume-log/taildir_position.json # 配置channel类型属性 a1.channels.c1.type = memory # 缓存池大小 a1.channels.c1.capacity = 1000 # 每个事务sink拉取的大小 a1.channels.c1.transactionCapacity = 100 # 配置Sink类型和属性 a1.sinks.s1.type = hdfs a1.sinks.s1.hdfs.path = hdfs://master:9000/data/nshop/ods/release/%Y%m%d a1.sinks.s1.hdfs.fileSuffix = .log # 下面三个配置参数如果都设置为0,那么表示不执行次参数(失效) a1.sinks.s1.hdfs.rollInterval = 10 a1.sinks.s1.hdfs.rollSize = 0 a1.sinks.s1.hdfs.rollCount = 0 # 设置采集文件格式 如果你是存文本文件,就是用DataStream a1.sinks.s1.hdfs.fileType = DataStream a1.sinks.s1.hdfs.writeFormat = Text # 开启本地时间戳获取参数,因为我们的目录上面已经使用转义符号,所以要使用时间戳 a1.sinks.s1.hdfs.useLocalTimeStamp = true
创建文件
[root@master conf]# vim agentData.conf
启动执行
[root@master conf]# flume-ng agent -n a1 -c conf -f agentData.conf -Dflume.root.logger=INFO,console
编写一个sh脚本,然后生产数据到对应位置的文件夹下面
#!/bin/base cat 2.txt >> /usr/local/soft/flume-1.9.0/data/2.txt
执行映射
hive> load data inpath "/data/nshop/ods/release22/20220630/*" into table ods_nshop.ods_nshop_01_releasedatas partition(bdp_day='20220630');
这篇关于电商项目_ods层建设的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-05-15鸿蒙生态设备数量超8亿台
- 2024-05-13TiDB + ES:转转业财系统亿级数据存储优化实践
- 2024-05-09“2024鸿蒙零基础快速实战-仿抖音App开发(ArkTS版)”实战课程已上线
- 2024-05-09聊聊如何通过arthas-tunnel-server来远程管理所有需要arthas监控的应用
- 2024-05-09log4j2这么配就对了
- 2024-05-09nginx修改Content-Type
- 2024-05-09Redis多数据源,看这篇就够了
- 2024-05-09Google Chrome驱动程序 124.0.6367.62(正式版本)去哪下载?
- 2024-05-09有没有大佬知道这种数据应该怎么抓取呀?
- 2024-05-09这种运行结果里的10.100000001,怎么能最快改成10.1?