From 030b99b5448e18135cd205831d45c87d7d8a441d Mon Sep 17 00:00:00 2001 From: Jian Xiao <99709935+jianoaix@users.noreply.github.com> Date: Fri, 13 May 2022 10:25:44 -0700 Subject: [PATCH] Add a classic yet small-sized ML dataset for demo/documentation/testing (#24592) To facilitate easy demo/documentation/testing with realistic, small-sized yet ML-familiar data. Have it as a source file with code will make it self-contained, i.e. after user "pip install" Ray, they are all set to run it. IRIS is a great fit: super classic ML dataset, simple schema, only 150 rows. --- python/ray/data/examples/iris.csv | 151 ++++++++++++++++++++++++++ python/ray/data/examples/iris.parquet | Bin 0 -> 3141 bytes 2 files changed, 151 insertions(+) create mode 100644 python/ray/data/examples/iris.csv create mode 100644 python/ray/data/examples/iris.parquet diff --git a/python/ray/data/examples/iris.csv b/python/ray/data/examples/iris.csv new file mode 100644 index 000000000..bf14e161b --- /dev/null +++ b/python/ray/data/examples/iris.csv @@ -0,0 +1,151 @@ +"sepal.length","sepal.width","petal.length","petal.width","variety" +5.1,3.5,1.4,.2,"Setosa" +4.9,3,1.4,.2,"Setosa" +4.7,3.2,1.3,.2,"Setosa" +4.6,3.1,1.5,.2,"Setosa" +5,3.6,1.4,.2,"Setosa" +5.4,3.9,1.7,.4,"Setosa" +4.6,3.4,1.4,.3,"Setosa" +5,3.4,1.5,.2,"Setosa" +4.4,2.9,1.4,.2,"Setosa" +4.9,3.1,1.5,.1,"Setosa" +5.4,3.7,1.5,.2,"Setosa" +4.8,3.4,1.6,.2,"Setosa" +4.8,3,1.4,.1,"Setosa" +4.3,3,1.1,.1,"Setosa" +5.8,4,1.2,.2,"Setosa" +5.7,4.4,1.5,.4,"Setosa" +5.4,3.9,1.3,.4,"Setosa" +5.1,3.5,1.4,.3,"Setosa" +5.7,3.8,1.7,.3,"Setosa" +5.1,3.8,1.5,.3,"Setosa" +5.4,3.4,1.7,.2,"Setosa" +5.1,3.7,1.5,.4,"Setosa" +4.6,3.6,1,.2,"Setosa" +5.1,3.3,1.7,.5,"Setosa" +4.8,3.4,1.9,.2,"Setosa" +5,3,1.6,.2,"Setosa" +5,3.4,1.6,.4,"Setosa" +5.2,3.5,1.5,.2,"Setosa" +5.2,3.4,1.4,.2,"Setosa" +4.7,3.2,1.6,.2,"Setosa" +4.8,3.1,1.6,.2,"Setosa" +5.4,3.4,1.5,.4,"Setosa" +5.2,4.1,1.5,.1,"Setosa" +5.5,4.2,1.4,.2,"Setosa" +4.9,3.1,1.5,.2,"Setosa" +5,3.2,1.2,.2,"Setosa" +5.5,3.5,1.3,.2,"Setosa" +4.9,3.6,1.4,.1,"Setosa" +4.4,3,1.3,.2,"Setosa" +5.1,3.4,1.5,.2,"Setosa" +5,3.5,1.3,.3,"Setosa" +4.5,2.3,1.3,.3,"Setosa" +4.4,3.2,1.3,.2,"Setosa" +5,3.5,1.6,.6,"Setosa" +5.1,3.8,1.9,.4,"Setosa" +4.8,3,1.4,.3,"Setosa" +5.1,3.8,1.6,.2,"Setosa" +4.6,3.2,1.4,.2,"Setosa" +5.3,3.7,1.5,.2,"Setosa" +5,3.3,1.4,.2,"Setosa" +7,3.2,4.7,1.4,"Versicolor" +6.4,3.2,4.5,1.5,"Versicolor" +6.9,3.1,4.9,1.5,"Versicolor" +5.5,2.3,4,1.3,"Versicolor" +6.5,2.8,4.6,1.5,"Versicolor" +5.7,2.8,4.5,1.3,"Versicolor" +6.3,3.3,4.7,1.6,"Versicolor" +4.9,2.4,3.3,1,"Versicolor" +6.6,2.9,4.6,1.3,"Versicolor" +5.2,2.7,3.9,1.4,"Versicolor" +5,2,3.5,1,"Versicolor" +5.9,3,4.2,1.5,"Versicolor" +6,2.2,4,1,"Versicolor" +6.1,2.9,4.7,1.4,"Versicolor" +5.6,2.9,3.6,1.3,"Versicolor" +6.7,3.1,4.4,1.4,"Versicolor" +5.6,3,4.5,1.5,"Versicolor" +5.8,2.7,4.1,1,"Versicolor" +6.2,2.2,4.5,1.5,"Versicolor" +5.6,2.5,3.9,1.1,"Versicolor" +5.9,3.2,4.8,1.8,"Versicolor" +6.1,2.8,4,1.3,"Versicolor" +6.3,2.5,4.9,1.5,"Versicolor" +6.1,2.8,4.7,1.2,"Versicolor" +6.4,2.9,4.3,1.3,"Versicolor" +6.6,3,4.4,1.4,"Versicolor" +6.8,2.8,4.8,1.4,"Versicolor" +6.7,3,5,1.7,"Versicolor" +6,2.9,4.5,1.5,"Versicolor" +5.7,2.6,3.5,1,"Versicolor" +5.5,2.4,3.8,1.1,"Versicolor" +5.5,2.4,3.7,1,"Versicolor" +5.8,2.7,3.9,1.2,"Versicolor" +6,2.7,5.1,1.6,"Versicolor" +5.4,3,4.5,1.5,"Versicolor" +6,3.4,4.5,1.6,"Versicolor" +6.7,3.1,4.7,1.5,"Versicolor" +6.3,2.3,4.4,1.3,"Versicolor" +5.6,3,4.1,1.3,"Versicolor" +5.5,2.5,4,1.3,"Versicolor" +5.5,2.6,4.4,1.2,"Versicolor" +6.1,3,4.6,1.4,"Versicolor" +5.8,2.6,4,1.2,"Versicolor" +5,2.3,3.3,1,"Versicolor" +5.6,2.7,4.2,1.3,"Versicolor" +5.7,3,4.2,1.2,"Versicolor" +5.7,2.9,4.2,1.3,"Versicolor" +6.2,2.9,4.3,1.3,"Versicolor" +5.1,2.5,3,1.1,"Versicolor" +5.7,2.8,4.1,1.3,"Versicolor" +6.3,3.3,6,2.5,"Virginica" +5.8,2.7,5.1,1.9,"Virginica" +7.1,3,5.9,2.1,"Virginica" +6.3,2.9,5.6,1.8,"Virginica" +6.5,3,5.8,2.2,"Virginica" +7.6,3,6.6,2.1,"Virginica" +4.9,2.5,4.5,1.7,"Virginica" +7.3,2.9,6.3,1.8,"Virginica" +6.7,2.5,5.8,1.8,"Virginica" +7.2,3.6,6.1,2.5,"Virginica" +6.5,3.2,5.1,2,"Virginica" +6.4,2.7,5.3,1.9,"Virginica" +6.8,3,5.5,2.1,"Virginica" +5.7,2.5,5,2,"Virginica" +5.8,2.8,5.1,2.4,"Virginica" +6.4,3.2,5.3,2.3,"Virginica" +6.5,3,5.5,1.8,"Virginica" +7.7,3.8,6.7,2.2,"Virginica" +7.7,2.6,6.9,2.3,"Virginica" +6,2.2,5,1.5,"Virginica" +6.9,3.2,5.7,2.3,"Virginica" +5.6,2.8,4.9,2,"Virginica" +7.7,2.8,6.7,2,"Virginica" +6.3,2.7,4.9,1.8,"Virginica" +6.7,3.3,5.7,2.1,"Virginica" +7.2,3.2,6,1.8,"Virginica" +6.2,2.8,4.8,1.8,"Virginica" +6.1,3,4.9,1.8,"Virginica" +6.4,2.8,5.6,2.1,"Virginica" +7.2,3,5.8,1.6,"Virginica" +7.4,2.8,6.1,1.9,"Virginica" +7.9,3.8,6.4,2,"Virginica" +6.4,2.8,5.6,2.2,"Virginica" +6.3,2.8,5.1,1.5,"Virginica" +6.1,2.6,5.6,1.4,"Virginica" +7.7,3,6.1,2.3,"Virginica" +6.3,3.4,5.6,2.4,"Virginica" +6.4,3.1,5.5,1.8,"Virginica" +6,3,4.8,1.8,"Virginica" +6.9,3.1,5.4,2.1,"Virginica" +6.7,3.1,5.6,2.4,"Virginica" +6.9,3.1,5.1,2.3,"Virginica" +5.8,2.7,5.1,1.9,"Virginica" +6.8,3.2,5.9,2.3,"Virginica" +6.7,3.3,5.7,2.5,"Virginica" +6.7,3,5.2,2.3,"Virginica" +6.3,2.5,5,1.9,"Virginica" +6.5,3,5.2,2,"Virginica" +6.2,3.4,5.4,2.3,"Virginica" +5.9,3,5.1,1.8,"Virginica" diff --git a/python/ray/data/examples/iris.parquet b/python/ray/data/examples/iris.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4c3f1459412bf03b654a295c2a9e69281ec1636b GIT binary patch literal 3141 zcmchadu$X%9LImV-M!AWyRdidUiaXDBf|C?+HyxNB9Ywg^+5}5X|LLspuOH*A6nX@ z4_fda(5iqS717`$AZU5SCV~lx{D(jY5Fiaj0)!BvME)&88W2MK&0ZfToF*oQN%!VA zzxmDV%N%*j8pC32z6Spjyj(8x1Fs`lj<3?rmy7tS*bXVv&Lq0y=V z&sdxwi!Nf(a(D((L}HO3P1L|95=b4Zt)d%aByfo3U`PLSQGscq3++u9o zF95tVvOWQ;L6$M4Yfd5GBNdiA5_h=xZ-940qMrFtBsDojDyfJ3pmdxOrrgymFJx3| zS^42rH6P5KR<&xFuXw#NL-@4j{m)93lPk>|Gq#vql_yI5X}#Cd^{;klR#Im%m0dfq zYA9{XP}jzkP(kg^*4l-|bv&q7c!!*1&*i{mSA@=RB+#1O8g6UqYL;2sAZt6FXuHU( z*IAYPmtEO2F%z830w#x6K&uf}{*@__vkja-F@VrGLE{r8AW^7h11b=a5W&DW6%(I~ zPa=X75r!m62pEq6OhIoFJ`$otFR*Bv&`!W89&11!BT92PoRSufVj5!PEAcQ{G=<<2 zX^FuU>&2etk)|-BqtJFlJzEt-9~a&!M)pXKub)_0Z@+{gL7Vq>4@gVfB+ z^(?4Qk|&uYN161rgvOEF;%!$s^`KeZpE$uJcCnKuE!GQhnH=WjtqCP^mBHj{RAQrE zsx5Ngi-5=jFX~BnEhnVHND1>{WiyQIsUas+ps8JDm30O7rBGjG4Nt)ROR*`T? z8viDjGgWjVW%CBaL_k117KAC`gBnG95vtRPI4NMsun|8hFRDhHScE#BhCOyen-c1m zYI+y?icp&p+O&_%k;g!%{+k}BPka<`@wd~&qn>nrPK z9qik4aNF&HZK0Mo?)bCQtFvyV@7w+R=9#qvyZ3h=Sbyk{3DnV)N%le{-1W2`iS3|C z{ZmpeiWBf-H}DVJiVOc1E8j*zqE_*KJFF?ZH$Cgd8W4#Oz7WAqfjEm0xAd8zP5 zMMZdE91ysV&>rGx7(oPg5zk{pa}VJ?!qdTN{0e$HTpVcTO@z_F%z(ykcjY8#jYeVP zE0T&3fJ0ExEA*HF>RB&(Vik$accC8KCgj7gPT&9t%GCtE4OX;wbq?vpv%md#!5K%l zE$_hW;EjsqN4qZRBzHLVEs}M=KD1!*>6?9PE}aW4=`OR`gPxx=wjbJgq-fsY4<~Bv zpzfPCNhLW-B}S1$yzM%to=;QH$Oz@x#ZoBGdgCi{nOs(a9}$XVZodgfRffCTI|E5* zRfRh`TY~Ma?Hy*?YUyZdX=@1v@Md$!&X~{7LU5TUdKFW20vs{rMNA3((6Av0F(FpH zIKv?eV|Rbz+CWE3xNBYX+P}=$ZZPT%hx*5KNBrsHoG8}bnhT1lGuRwn9gveiPMXX9 z{hJbX#a?M+L9qvYEGQ<+APYFt_zLGji39h6Unjf6zVdl}z@FEo{M;^p=XIw4=Tm0u zpMm66V(Uvw1+Y65LHGMtR4pi-!NiWp(COxmo@cA&H zlv;VZUQBCjkJs3G4WInCslT26EHwtXsYxf+3-a+Li>OgpK-<0^`fHff(Dfibz6G^4 z%OZ6({#?2g9B1@5l;cnuUQueSuZV_EEEU&7`-*(6p{0eLC2gTdU6H?CFYFk5U$Nfy z)OcgjG2`>;bmQY!_qH~y4!Hx=r{}G}N#;{SFPm6@e*E=!Vwi7zU5&4~Aul&4Q0>P# z^5#v9$IW0e@zu3->qZ`Np7DJ9^j`Wt-N>JjFQXZa3ofYYsjcqmZdh8?U6+GP&)0@BR@ literal 0 HcmV?d00001