admin 管理员组文章数量: 887006
hive的hql练习题
1、找出全部夺得3连贯的队伍
数据:
活塞,1990
公牛,1991
公牛,1992
公牛,1993
火箭,1994
火箭,1995
公牛,1996
公牛,1997
公牛,1998
马刺,1999
湖人,2000
湖人,2001
湖人,2002
马刺,2003
活塞,2004
马刺,2005
热火,2006
马刺,2007
凯尔特人,2008
湖人,2009
湖人,2010
-- 创建表
create table hwt1(
team string,
year int
)
row format delimited
fields terminated by ',';
-- 加载数据
load data local inpath "/home/hw2/t1.dat" into table hwt1;
-- 检查数据
select * from hwt1;
答题:
-- 1.将数据分组排序
select team,year,row_number() over(partition by team order by year) rowNum
from hwt1;
-- 2.添加过度字段
select team,year-row_number() over(partition by team order by year) gid
from hwt1;
-- 3.结果sql ——————————
select team,gid+1 starYear
from (select team,year-row_number() over(partition by team order by year) gid from hwt1
) t1 group by team,gid having count(*)>=3;
结果:
2、找出每个id在在一天之内所有的波峰与波谷值
数据:
sh66688,9:35,29.48
sh66688,9:40,28.72
sh66688,9:45,27.74
sh66688,9:50,26.75
sh66688,9:55,27.13
sh66688,10:00,26.30
sh66688,10:05,27.09
sh66688,10:10,26.46
sh66688,10:15,26.11
sh66688,10:20,26.88
sh66688,10:25,27.49
sh66688,10:30,26.70
sh66688,10:35,27.57
sh66688,10:40,28.26
sh66688,10:45,28.03
sh66688,10:50,27.36
sh66688,10:55,26.48
sh66688,11:00,27.41
sh66688,11:05,26.70
sh66688,11:10,27.35
sh66688,11:15,27.35
sh66688,11:20,26.63
sh66688,11:25,26.35
sh66688,11:30,26.81
sh66688,13:00,29.45
sh66688,13:05,29.41
sh66688,13:10,29.10
sh66688,13:15,28.24
sh66688,13:20,28.20
sh66688,13:25,28.59
sh66688,13:30,29.49
sh66688,13:35,30.45
sh66688,13:40,30.31
sh66688,13:45,30.17
sh66688,13:50,30.55
sh66688,13:55,30.75
sh66688,14:00,30.03
sh66688,14:05,29.61
sh66688,14:10,29.96
sh66688,14:15,30.79
sh66688,14:20,29.82
sh66688,14:25,30.09
sh66688,14:30,29.61
sh66688,14:35,29.88
sh66688,14:40,30.36
sh66688,14:45,30.88
sh66688,14:50,30.73
sh66688,14:55,30.76
sh88888,9:35,67.23
sh88888,9:40,66.56
sh88888,9:45,66.73
sh88888,9:50,67.43
sh88888,9:55,67.49
sh88888,10:00,68.34
sh88888,10:05,68.13
sh88888,10:10,67.35
sh88888,10:15,68.13
sh88888,10:20,69.05
sh88888,10:25,69.82
sh88888,10:30,70.62
sh88888,10:35,70.59
sh88888,10:40,70.40
sh88888,10:45,70.29
sh88888,10:50,70.53
sh88888,10:55,70.92
sh88888,11:00,71.13
sh88888,11:05,70.24
sh88888,11:10,70.37
sh88888,11:15,69.79
sh88888,11:20,69.73
sh88888,11:25,70.52
sh88888,11:30,71.23
sh88888,13:00,72.85
sh88888,13:05,73.76
sh88888,13:10,74.72
sh88888,13:15,75.48
sh88888,13:20,75.80
sh88888,13:25,76.74
sh88888,13:30,77.22
sh88888,13:35,77.12
sh88888,13:40,76.90
sh88888,13:45,77.80
sh88888,13:50,78.75
sh88888,13:55,78.30
sh88888,14:00,78.68
sh88888,14:05,78.99
sh88888,14:10,78.35
sh88888,14:15,78.37
sh88888,14:20,78.07
sh88888,14:25,78.80
sh88888,14:30,79.78
sh88888,14:35,79.72
sh88888,14:40,80.71
sh88888,14:45,79.92
sh88888,14:50,80.49
sh88888,14:55,80.44
准备:
-- 创建表
create table hwt2(
id string,
time string,
price double
)
row format delimited
fields terminated by ',';
-- 导入数据
load data local inpath "/home/hw2/t2.dat" into table hwt2;
-- 检查
select * from hwt2;
答题:
-- 处理time格式
select id,if(length(time)=4,'0'||time,time)time,price from hwt2;
-- 获取错位减值,与下一行进行比较,如果大于尾正,小于为负
-- 并进行排序
select id,if(length(time)=4,'0'||time,time) time,price,if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNum
from hwt2;
-- 由id,特征进行分组根据时间进行排序,rowNum-值
-- 获得分组情况,feature为1第一条数据为峰值,为-1的一条数据为谷值
select t1.id,t1.time,t1.price,t1.feature,t1.rowNum-row_number() over(partition by t1.id,t1.feature order by t1.time) gid
from(select id,if(length(time)=4,'0'||time,time) time,price,if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNumfrom hwt2
)t1;
-- 获取每组id,difval,gid第一条数据
select t3.id,t3.time,t3.price,if(t3.feature=1,'波峰','波谷')feature
from(select t2.id,t2.time,t2.price,t2.feature,row_number() over(partition by t2.id,t2.feature,t2.gid order by t2.time) rnfrom(select t1.id,t1.time,t1.price,t1.feature,t1.rowNum-row_number() over(partition by t1.id,t1.feature order by t1.time) gidfrom(select id,if(length(time)=4,'0'||time,time) time,price,if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNumfrom hwt2)t1)t2
)t3
where t3.rn=1;
-- 整理:根据id分组 time进行排序
不会了!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
结果:
3、写SQL
数据:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:02 .html?show=IEEE1FIJ3106A1H062HA
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:23 .html?show=EC1JGEC8G3HJC82JIHCD
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:09 .html?show=G75J62JE63JE3678G98F
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:12 .html?show=1957CGIA1702C1J9F0GH
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:31 .html?show=BJ9CJJ6F0GH0CDGGHCCB
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:34 .html?show=G5472AH6G1I61CGF9HGC
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:46 .html?show=E771D8I4JJ0DE4DF575C
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:13 .html?show=3EG4D1108IC3B446G2EB
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:14 .html?show=99B480535EC2FA31DJ92
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:11 .html?show=JGH3HJ36D7GHIEHEEFI6
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:20 .html?show=6A6799246J9J4B6IC9HI
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:25 .html?show=697DI68E5F133A1DD96D
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:41 .html?show=1C72FF96F549G4A458BI
32258fe7130844399859aec54b6df5ff 2020/05/28 03:47 .html?show=A9IB685E7CJ9DIAB2244
32258fe7130844399859aec54b6df5ff 2020/05/28 03:33 .html?show=A70H86DF1EHG2E57H1HE
32258fe7130844399859aec54b6df5ff 2020/05/28 03:21 .html?show=366DJ2870404637EC19D
32258fe7130844399859aec54b6df5ff 2020/05/28 03:54 .html?show=DGCC1FH06B69I9B1GA08
32258fe7130844399859aec54b6df5ff 2020/05/28 03:48 .html?show=JAF2067192A1H53IJ00G
32258fe7130844399859aec54b6df5ff 2020/05/28 03:08 .html?show=I1J8G8075B7G5IDA326C
32258fe7130844399859aec54b6df5ff 2020/05/28 05:09 .html?show=J7BGJ4B50GFHG4FEJCB6
32258fe7130844399859aec54b6df5ff 2020/05/28 05:26 .html?show=6J9JJ89EADI7DI0H82C3
32258fe7130844399859aec54b6df5ff 2020/05/28 05:11 .html?show=6J224ECEABC7C9I62763
32258fe7130844399859aec54b6df5ff 2020/05/28 05:34 .html?show=GH06BC9D6I2G7H3D79B8
32258fe7130844399859aec54b6df5ff 2020/05/28 05:18 .html?show=H426J08J6H4JJB74HFJE
32258fe7130844399859aec54b6df5ff 2020/05/28 05:50 .html?show=0C78E264AHEADEJ26643
32258fe7130844399859aec54b6df5ff 2020/05/28 05:45 .html?show=A4702EJ6E5DJIA475AF1
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:04 .html?show=3I84DE05EH1AB6D13B3G
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:21 .html?show=CC6C7J0G326G2BJ3D179
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:48 .html?show=7HJ4BIAGHD73F49G9JJC
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:56 .html?show=H1110A1AA14H64DA876C
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:50 .html?show=664G909C9EG7JC63IB7D
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:53 .html?show=FGEC1A7I60JJDAJGAF1A
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:00 .html?show=240EB7E488G6FH0G27JF
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:48 .html?show=03FI36B82792CEBJHI29
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:14 .html?show=JHF32AB5EH58HEC2F63G
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:50 .html?show=HDCJCA8JE1BF1IFE6HF6
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:58 .html?show=I8D80BJFC3F3FEGGHA5C
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:13 .html?show=2GHE5B24F5ABC13I6EB4
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:39 .html?show=F6G8632470DAE5E760BG
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:15 .html?show=1E7F19G856JA9JD8AB9D
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:43 .html?show=5E96FFJA82E1I2BF2FEE
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:14 .html?show=89H155HCJ41H228010I5
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:36 .html?show=B38GA2D1E10EBFE8F6F7
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:24 .html?show=FC6FD5F45B12ABIF02GD
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:06 .html?show=HG4HE7CGI00A7A1F2J5F
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:51 .html?show=JB27071067EGBE8D060C
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:33 .html?show=D42J0IC234DIA481EF82
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:09 .html?show=H96I861CGIGIF571H2JJ
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:24 .html?show=H93JAJFDJH19HEF1E918
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:47 .html?show=FG0BGA0CFDF6270IJE32
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:17 .html?show=80JBIA9GFAJ76FD980AE
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:46 .html?show=1G50E0G0804JAJH2HBA1
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:00 .html?show=21DGJA045F8E64JHA0D6
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:08 .html?show=44EDBC5B43A444FH001C
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:10 .html?show=A1I8GI28GF0B14E97D64
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:51 .html?show=FIHCCA16AJDA32EC4332
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:56 .html?show=78GH99D70424B013G303
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:20 .html?show=F31BH181E6E8JJ4AD295
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:16 .html?show=47G414184H33E14DH159
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:10 .html?show=JJ189D6F4HD6F0E7A2AG
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:51 .html?show=7DA832A31BI430197F48
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:32 .html?show=ACDFI9730A2B646I0270
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:11 .html?show=999AGGBH0DC2E35J097B
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:32 .html?show=58IF72BB8F74ID23GE87
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:37 .html?show=460DIAJ135CC950J3C77
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:35 .html?show=81II8DB2J2IF8AIFJ67F
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:22 .html?show=H02AJA95GBE98768ADHF
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:07 .html?show=A4EDIFDEJB2J40I64F04
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:56 .html?show=5I687EDH2C3A1JJAF57D
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:58 .html?show=929I8CG2CDB9AE0268JI
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:27 .html?show=D8C799HJ092G9I4230EH
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:17 .html?show=876583DAG4FIEI637F6E
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:46 .html?show=936BIDF2F4352A39H6FF
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:17 .html?show=6J2A9DDHB4787CD90134
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:35 .html?show=F87D3565JD253ED6FIHE
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:56 .html?show=DB23C4598E005CGHH06D
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:53 .html?show=B9B3GFIB5EIC9E32J5IJ
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:16 .html?show=20BBBA585JF22I953GBG
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:23 .html?show=IAG945B35D5DA6F1E992
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:52 .html?show=47BH0G2A7IGFHIH61A85
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:45 .html?show=IDE79JG4DI0J6508F0HH
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:32 .html?show=5JA75D16G22BE0H881G2
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:47 .html?show=F58GI58H74989HD65173
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:46 .html?show=G24C84DCG9FD5GFBFCEE
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:54 .html?show=90BIIIIA346E50A5DA67
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:17 .html?show=9F3AB45F1C3HAH58B8B8
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:16 .html?show=1J85244F2F81JCBGHH9C
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:56 .html?show=73E0E5J74EA8A5B8C0FD
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:14 .html?show=DCIJI9H51I5BHGC1587E
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:58 .html?show=FI5EICD1F25F005J3CJG
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:13 .html?show=87F3GAA5DH97H6G6I3J2
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:51 .html?show=8775525F5EG213C94EE7
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:33 .html?show=3HHEBFA7BA8J8BH3GGIG
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:37 .html?show=BD7FE2HE8AED2F5J6818
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:56 .html?show=DE8IH40GG47E096E0BE5
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:38 .html?show=24AEBB54FA71D9F7JIDA
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:05 .html?show=5JI210IFIJ0I707G4II7
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:39 .html?show=9BCGBGHBE73IG5AF4569
准备
-- 创建过度表
create table hwt3(
id string,
dt string,
browseid string
)row format delimited fields terminated by '\t';
-- 导入数据
load data local inpath "/home/hw2/t3.dat" into table hwt3;
-- 检查数据
select * from hwt3;
-- !!(下面作废)!!
-- 简化数据,建表
create table if not exists hwt3
as select id,split(dt,' ')[1]dt,split(split(browseid,'/')[4],'\\?')[0] browseid from hwt3mid;
-- 检查数据
select * from hwt3;
-- 删除过度表
drop table hwt3mid;
3.1、每个id浏览时长、步长
答题:
-- 获取分组时长duration
select id,browseid,(unix_timestamp(concat_ws('-',split(last_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00') -unix_timestamp(concat_ws('-',split(first_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00'))/60 duration
from hwt3;
-- 获取分组步长
select id,duration,count(*) stepSize
from(select id,browseid,(unix_timestamp(concat_ws('-',split(last_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00') -unix_timestamp(concat_ws('-',split(first_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00'))/60 durationfrom hwt3
)t1 group by id,duration;
结果:
3.2、如果两次浏览之间的间隔超过30分钟,认为是两个不同的浏览时间;再求每个id浏 览时长、步长
答题
-- 将超过30分钟求出
select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) gid
from hwt3;
-- 添加gid
select id,browseid,dt,sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gid
from(select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flagfrom hwt3
)t3;
-- 计算时长
select id,dt,browseid,gid,(unix_timestamp(last_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)) -unix_timestamp(first_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)))/60 duration
from(select id,browseid,dt,sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gidfrom(select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flagfrom hwt3)t3
)t2;
-- 结果:计算步长
select id,duration,count(*) stepSize
from(select id,browseid,(unix_timestamp(last_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)) -unix_timestamp(first_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)))/60 durationfrom(select id,browseid,dt,sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gidfrom(select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flagfrom hwt3)t3)t2
)t1 group by id,duration;
结果:
本文标签: hive的hql练习题
版权声明:本文标题:hive的hql练习题 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.freenas.com.cn/jishu/1732353883h1533880.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论