DataFrame中的行动算子操作1
2022/8/30 23:23:02
本文主要是介绍DataFrame中的行动算子操作1,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
val conf = new SparkConf().setAppName("action").setMaster("local[*]") val session = SparkSession.builder().config(conf).getOrCreate() val seq: Seq[(String, Int)] = Array( ("zs123456789123456789123", 20), ("zs123456789123456789123", 21), ("zs123456789123456789123", 22), ("zs123456789123456789123", 23), ("zs123456789123456789123", 24), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 21), ("zs123456789123456789123", 22), ("zs123456789123456789123", 23), ("zs123456789123456789123", 24), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 29), ("zs123456789123456789123", 30), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 20), ("zs123456789123456789123", 29), ("zs123456789123456789123", 30) ) import session.implicits._ val frame: DataFrame = seq.toDF("namea", "ageb")
1. printSchema
def printSchemaOpt(frame: DataFrame): Unit = { println("-----------printschema操作开始-----------") frame.printSchema() println("-----------printschema操作结束-----------") } 结果: -----------printschema操作开始----------- root |-- namea: string (nullable = true) |-- ageb: integer (nullable = false) -----------printschema操作结束-----------
2. show
show():显示所有数据,最多显示20个字符,默认为true show(n) :显示前n条数据,最多显示20个字符,默认为true show(true): 最多显示20个字符,默认为true show(false): 去除最多显示20个字符的限制 show(n, true):显示前n条并最多显示20个字符 def showOpt(frame: DataFrame) = { println("-----------show1操作开始-----------") frame.show() println("-----------show1操作结束-----------") println("-----------show2操作开始-----------") frame.show(3) println("-----------show2操作结束-----------") println("-----------show3操作开始-----------") frame.show(30, true) println("-----------show3操作结束-----------") } -----------show1操作开始----------- +--------------------+----+ | namea|ageb| +--------------------+----+ |zs123456789123456...| 20| |zs123456789123456...| 21| |zs123456789123456...| 22| |zs123456789123456...| 23| |zs123456789123456...| 24| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 21| |zs123456789123456...| 22| |zs123456789123456...| 23| |zs123456789123456...| 24| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| +--------------------+----+ only showing top 20 rows -----------show1操作结束----------- -----------show2操作开始----------- +--------------------+----+ | namea|ageb| +--------------------+----+ |zs123456789123456...| 20| |zs123456789123456...| 21| |zs123456789123456...| 22| +--------------------+----+ only showing top 3 rows -----------show2操作结束----------- -----------show3操作开始----------- +--------------------+----+ | namea|ageb| +--------------------+----+ |zs123456789123456...| 20| |zs123456789123456...| 21| |zs123456789123456...| 22| |zs123456789123456...| 23| |zs123456789123456...| 24| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 21| |zs123456789123456...| 22| |zs123456789123456...| 23| |zs123456789123456...| 24| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 20| |zs123456789123456...| 29| |zs123456789123456...| 30| |zs123456789123456...| 20| |zs123456789123456...| 20| +--------------------+----+ only showing top 30 rows -----------show3操作结束-----------
3. first/head/take/takeAsList
def getDataOpt(frame: DataFrame): Unit = { println("-----------first操作开始-----------") val row: Row = frame.first() println(row.getAs[Int](1)) println("-----------first操作结束-----------") println("-----------head操作开始-----------") val array: Array[Row] = frame.head(3) println(array.mkString("=")) println("-----------head操作结束-----------") println("-----------take操作开始-----------") val arr: Array[Row] = frame.take(3) println(arr.mkString("=")) println("-----------take操作结束-----------") println("-----------takeAsList操作开始-----------") val list: util.List[Row] = frame.takeAsList(3) println(list) println("-----------takeAsList操作结束-----------") } -----------first操作开始----------- 20 -----------first操作结束----------- -----------head操作开始----------- [zs123456789123456789123,20]=[zs123456789123456789123,21]=[zs123456789123456789123,22] -----------head操作结束----------- -----------take操作开始----------- [zs123456789123456789123,20]=[zs123456789123456789123,21]=[zs123456789123456789123,22] -----------take操作结束----------- -----------takeAsList操作开始----------- [[zs123456789123456789123,20], [zs123456789123456789123,21], [zs123456789123456789123,22]] -----------takeAsList操作结束-----------
4. collect/collectAsList:慎用:获取DataFrame中的所有数据,将DataFrame在不同分区的数据拉取到同一个节点上,容易导致内存溢出
def collectOpt(frame: DataFrame): Unit = { println("-----------collect操作结束-----------") val array: Array[Row] = frame.collect() println(array.mkString("=")) println("-----------collect操作结束-----------") println("-----------collectAsList操作开始-----------") val array1 = frame.collectAsList() println(array1) println("-----------collectAsList操作结束-----------") } -----------collect操作结束----------- [zs123456789123456789123,20]=[zs123456789123456789123,21]=[zs123456789123456789123,22]=[zs123456789123456789123,23]=[zs123456789123456789123,24]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,21]=[zs123456789123456789123,22]=[zs123456789123456789123,23]=[zs123456789123456789123,24]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,29]=[zs123456789123456789123,30]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,20]=[zs123456789123456789123,29]=[zs123456789123456789123,30] -----------collect操作结束----------- -----------collectAsList操作开始----------- [[zs123456789123456789123,20], [zs123456789123456789123,21], [zs123456789123456789123,22], [zs123456789123456789123,23], [zs123456789123456789123,24], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,21], [zs123456789123456789123,22], [zs123456789123456789123,23], [zs123456789123456789123,24], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,29], [zs123456789123456789123,30], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,20], [zs123456789123456789123,29], [zs123456789123456789123,30]] -----------collectAsList操作结束-----------
这篇关于DataFrame中的行动算子操作1的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-04-26敏捷开发:想要快速交付就必须舍弃产品质量?
- 2024-04-26静态代码分析的这些好处,我竟然都不知道?
- 2024-04-26你在测试金字塔的哪一层?(下)
- 2024-04-26快刀斩乱麻,DevOps让代码评审也自动起来
- 2024-04-262024年最好用的10款ER图神器!
- 2024-04-2203-为啥大模型LLM还没能完全替代你?
- 2024-04-2101-大语言模型发展
- 2024-04-17基于SpringWeb MultipartFile文件上传、下载功能
- 2024-04-14个人开发者,Spring Boot 项目如何部署
- 2024-04-14RAG应用开发实战02-相似性检索的关键 - Embedding