From 03250259177a37bd9fb21736c702571af9a63ec1 Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 1 Dec 2016 20:01:10 -0600 Subject: [PATCH 1/2] API add dtype param to read_excel --- doc/source/io.rst | 14 ++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/io/excel.py | 17 +++++++++++++---- pandas/io/parsers.py | 4 ++-- pandas/io/tests/data/testdtype.xls | Bin 0 -> 22528 bytes pandas/io/tests/data/testdtype.xlsm | Bin 0 -> 8517 bytes pandas/io/tests/data/testdtype.xlsx | Bin 0 -> 8501 bytes pandas/io/tests/test_excel.py | 27 +++++++++++++++++++++++++++ 8 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 pandas/io/tests/data/testdtype.xls create mode 100644 pandas/io/tests/data/testdtype.xlsm create mode 100644 pandas/io/tests/data/testdtype.xlsx diff --git a/doc/source/io.rst b/doc/source/io.rst index f524d37d0de60..38743a4f931a5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2538,6 +2538,20 @@ missing data to recover integer dtype: cfun = lambda x: int(x) if x else -1 read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun}) +dtype Specifications +++++++++++++++++++++ + +.. versionadded:: 0.20 + +As an alternative to converters, the type for an entire column can +be specified using the `dtype` keyword, which takes a dictionary +mapping column names to types. To interpret data with +no type inference, use the type `str` or `object`. + +.. code-block:: python + + read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str}) + .. _io.excel_writer: Writing Excel Files diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6fe0ad8092a03..06517c1489861 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -22,8 +22,8 @@ New features ~~~~~~~~~~~~ -``read_csv`` supports ``dtype`` keyword for python engine -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +``dtype`` keyword for data io +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs ` for more information. @@ -35,7 +35,7 @@ The ``dtype`` keyword argument in the :func:`read_csv` function for specifying t pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes The ``dtype`` keyword argument is also now supported in the :func:`read_fwf` function for parsing -fixed-width text files. +fixed-width text files, and :func:`read_excel` for parsing Excel files. .. ipython:: python diff --git a/pandas/io/excel.py b/pandas/io/excel.py index d3171ceedfc03..6b7c597ecfcdc 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -87,6 +87,14 @@ either be integers or column labels, values are functions that take one input argument, the Excel cell content, and return the transformed content. +dtype : Type name or dict of column -> type, default None + Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} + Use `str` or `object` to preserve and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. + + .. versionadded:: 0.20.0 + true_values : list, default None Values to consider as True @@ -184,8 +192,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, - true_values=None, false_values=None, engine=None, squeeze=False, - **kwds): + dtype=None, true_values=None, false_values=None, engine=None, + squeeze=False, **kwds): if not isinstance(io, ExcelFile): io = ExcelFile(io, engine=engine) @@ -195,7 +203,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, convert_float=convert_float, has_index_names=has_index_names, - skip_footer=skip_footer, converters=converters, + skip_footer=skip_footer, converters=converters, dtype=dtype, true_values=true_values, false_values=false_values, squeeze=squeeze, **kwds) @@ -318,7 +326,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, true_values=None, false_values=None, verbose=False, - squeeze=False, **kwds): + dtype=None, squeeze=False, **kwds): skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: @@ -501,6 +509,7 @@ def _parse_cell(cell_contents, cell_typ): skiprows=skiprows, skipfooter=skip_footer, squeeze=squeeze, + dtype=dtype, **kwds) output[asheetname] = parser.read() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 580a3398bb66a..ef839297c80d3 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -18,7 +18,7 @@ from pandas.types.common import (is_integer, _ensure_object, is_list_like, is_integer_dtype, is_float, is_dtype_equal, - is_object_dtype, + is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) from pandas.types.missing import isnull from pandas.types.cast import _astype_nansafe @@ -1329,7 +1329,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, try_num_bool=False) else: # skip inference if specified dtype is object - try_num_bool = not (cast_type and is_object_dtype(cast_type)) + try_num_bool = not (cast_type and is_string_dtype(cast_type)) # general type inference and conversion cvals, na_count = self._infer_types( diff --git a/pandas/io/tests/data/testdtype.xls b/pandas/io/tests/data/testdtype.xls new file mode 100644 index 0000000000000000000000000000000000000000..f63357524324ff6ff97f4b1a15b6a371957ec6e1 GIT binary patch literal 22528 zcmeHP32YqY6@If`+v_8V?Ig}gvhk7FjvXA^3FPuRF;|3h2tq@JX>jayY{ju7$A&~A zjcGxuN=m_@K_za12Gj!z4M#)O&;$vADw!5hC?$GdnZh zZE4l2Gpm1g=FR`U_vXF#zyH|Ze(C4MZ#{5q_S<4Rnk7p(R|dVeRkdt`lqFJ)4_M87HDNPTP1wQ_ky=@46{f#|Ri0AKBr+073SBOX(UOuv z4gQ2&gIS3-bIy?mrBq}ej`q4;yV~ozy4u4%Ju7$ZTDfP>TQUvq6ZvZ_tiJWrsW9nC zv_Ab)v;%FcNtj>y5wO|$$v8cDpVj^e`uAPv-*cfq>Ow!_LVwJK{)h|x8!q(QUFdH5 zxZyvcaHhj7Dn1i-_@)b8*C(~Uu1_lcK^L53F7zi|=s$6xf7ylpBNuw6`aGp@3R3K& z$H^3fes^*4Jq7m^Nbs<#U!0(Kcb}Ypa=rwQD|$9kjI#6JQ9&I>d{(>AYhCCyj`XR( zRQP|1!;6leZ_pF|)#EwWpuZxUaXBBuFQq5Og*uK-$Ls6#yd*u0OT4~~Zj(F7sGqQp z9{(7I8u&Nha<+j}XwVaSJ{-4Oy#82macMy*?0m1HPd4fUU$?Vv|7>)ZPIl%5$H_j^ z4Eie=0ZMbi4kZTN_FjU7pT~ZXj69HMZVpY8n`yYj5s_b(-xS;*s+0W|Ix7TZ5}31a z#nC7GR`g0WlhGptQW;n!BL;mGn1KW;l97a9rVQ-rGLk@FWyHW>&WM3d$%ujBpAiGw zT}BK{G#N3l#bm_5CYccf+gL^nOp2ZiYi;d;*&xfkJp@%liFPyO^Z{Zxygy_IVaiPd zgkk6o!uFm92t(5ygh@CJ5QeKe2op;hAPix55O&9DfH17xK`^kL7-YQ7+dG(@qyysZ zotf!@czb77dLZ83L5G#j>v(%-PI@5T-odoaqzP`UeTm+78XCR5gK1wnAl}}YmmY|> zchK{uqlve7!s&r{duKs4A8AXJL9E-riZ1 z9*DPhsuCa^FO5k~%rXvR+u?)Q3qOk`)8k#X&Z`utH_J{0Bap7K9iUT_yj&JtRLl31W6^TSrlrqx{rAX7EMGU3P zwA4}TVpR7W7ud(iwV5*1Wy-K5Q-%u@4D>b~XfbT+u%sT<^e&hTm|a*Ij2L_VPv#i2 zk8Q9jWlAG-sYNPdheYo*f~B~LL1Gbakj8XC?A?vsuT20+213f~q=xe{FeKYpByyv( z5o6mWm6Cg$kdi?RnQ(C*3X0&$FAVnQUYd6zUeYOI1%|44b`^+gXn$wM8Ml;P&%A9Tt zrOdR{6oeIGQ{<&sDwScCg~1-G+Wycrw(ejx7=rF|8QR_6RL#xT=nhgfS(DI&;Ra_| z?U~BZoM529ly^N8D3Ky4Dh>6O;kxkJ&dzB6(9+S+1{r178$Yz*yx1hH;@3;1gryE; zt#slOm3|rGYB}CUlS0W2M!V0Pdq0-VYs!Ltm0ZlAm}0w_hEy?S9%A&&rNm@MyU(Bb zwX>KKyO_pQF$+D!=($aa$%%Hq`~DlwVy4=~G^L8E@esr87dKjNwEOIve{dEv%`RqH zs+c7nVwf}IVnWgG(|`Q4vzY02G0RiMG9IQYiJyO@r<*W1$HqG zk=EoP#vEx4jUS%;s*{*PEdOJoo9H*!+WO54V!5t;rO2jRo^b{(Pl6(%?4S#5pynvT zZz!8LID=LsK@mT8&`KMqIc7wT{P?rZpp{8bM2j7?+6HQl5Rs9tZ#jc5OoAdd?4Wfv zP;)$pT=~fJ&Y+8upoj!J=u#V~+4~}YJA9cFXh^C!j$7B!H5yuB^}IGGmL=;j)9Xs~ zz}XzGVHJU+BVKcXiWu!8pWb-%C)AqzwUFhEM`>1V1U^A*VqaDx2>@mb*O5-J!q~?B2l77p*5{>(-9spl+3|ZVq-jChLafI`jnBCwS^cg5X(b z;|az>;2Flgut7EU?Ua29jwK_x;8g+{4a9(40pU#elS03ICGy2O11`w#L zthn0jSWqs9(FSax%Ebr+3&ISn$r$frLG6lYM^|tEjxgp5Hbt|FVA(665M#d=itd1d z`W!)p02<8v_4{==hGH%>(-YyLz+_A^OyJ0UcJN{ca6SLIfX5==KFKjW!5hus$bok7 z5(jWS54wQsLxX?H>@=0n|Mxn{E*3gpW&ZVOu9zk3*=Fl=>MU>e?M zJuJH$u}G&Un?Dyn45x90$5MHnC4Pdk5Grnc4VTHuY997K0%GI9P;@l3-HH*&M|&a05R?+U zpOJaFuwtY%E{Y@*t$t^uH2uyP@|htm zJ|`imiGF8jqTd;s=yyz*D@U?ny9%oc+1YT!X1w!YV&YGeF;rJERJZgF^}$+8tz21C z;GQjrjv+XX_DYR*jS#C9#J~_Ez_#{~HA_Lz;JWvNDyS^$3$(kDNA$Nf?igalvIf1Q zZ0#8AXUfvkYs9K(pa7YAD-R~Eh_bgkTBS|qs>Z&!@Z7JzJ5Z+kUV z-lwiM;N6)FhY@!xV8vd!QAJR)23yb$^rBo&SuQuIEy}myt&VVU8j*2zPc%BT^xn`+ zxz{994_kd*=P0&yA2mHA-$8`2hR?HF2t{PaIZZx7XTWMKAa$T@N9jcALK3R4U#iXx z>fET#O)(4tLQ()-U`Oc;m7?Gxew4<)19BKT95y{!_d89FfOf=0gB&>s+CdYI0Nf>^ zZ%r?G}cg3<4tUufY$rMWZ@?5i!1{ehh<*v7{Q{upX<~|ES{{Xc3bA@IU~c^(9l7FVLVS ztEaSyzV;sqa3=+?YiVjYd5Vz68#|4bwbS776dG+~uJ^I>mfHG-C;FWR`%{K~7f{|X z?y_)4+-I;+pLMu7jdduCb&6D-(s-Sqs>9zdsYfu5TGa9y+Tv%R`et+u4fs?@ zGuAYJs9ey3+B}GMJ0kmbi$C+RpBitERZu+EpfIiiJ3=#Lhp8IEkqZl1m+0+)X#ck3 z6QrQ%}*ucZFo}q9VH(Su48bhV<&DEomh$jU?pnAZcaET9g34qv3ONFXK;`}aHu;W`A@BlIOv zDFlg-TwY-HtpkHQ_i(jzk6H#|ZU68s;9J1AfNufc0=@-&3-}iBE#OB87f%)j5`7UE2fQpr}ERj3)`C>-KhW929y<+k z+TYj^l#ovy(q%l#O~^R1Mfk>Z z@44q3&-wm>d)I#UUeA8ke&1)WwSK?#76goe2?SsRZ~*`S6M(6t*2@_U0Pw~H04M;s z==##mP9By{9>$tpE|%_woKQyx#%xSihq-|Kb%WjvG+z0Fla`DO^dfJ_=hX zm%!!UMeiqn@Kn6Did99kWz`jYd8RxT3W&co;dEHOk@3~F1Sh9E#Q}@Veh+_YLo6Xs z!)V&csPt^h-EV|PdCbI~%^B^fvh9)A#=X}LCc;sJ^yezh)Mky)u#yp$@3``oSyC#)P3)K{gef1s z_lF!l3e>O^#3_2bOf+M%!k9&OL05`|kVtd{c`JkvaKMUNqbWd*lK&Fgzg!36S9z`xK{FYd9pkB9%?xNXw z3EHHT#x4?GS|gab9XpchKb?@bMfBvAu=37WGG(v3ne|MGPp<>h$py8RhK;aosab~G zTvNcZX##Oo;P-R}!}!;rs- zGFg8oNuYRK;t^^^>QN-22H>JW9k~9*lNZiz_GZq`_CL+p@0>wH*))`v|L#_-t^)1^ zkv0)rhk{px37|Yj)jTW?|6ykLA{~Q6qpOc8Sb?=)bsp|pLhj> z;-u|iei%8XH-*EV2$u8E(GUi%*)EbkNFIxVd}sS>(JldTiaSM}cuX%x22g1IoQ?TT zXinZ;nd7=KIG^1x$dkv;{UcpJ3Nr(hs{xlG8av3K%9y*aQ|xy^UJRp0 zckmQNiM#-#n-feA&)Z~AaSt1>tW>;)?b~uL3D;UO~Px3v~t{799OFrq7X7-|eGBdty%o0OhlpPwqs&aT1?$F4wS`NGoz89g-yC61D zl%u`+REP`7ZY!X|P6pTI94bM)sCmcY%zl7Ih+`SSwxtO5)@t&r64j|$(rIADHStkQ zC7r#iC~6;q{iF&Bf>uG(Bl?YNPzFE~avxG7S--v_y12kGciM0FVN+;AC-ofekm z6JK!pd5q!l$s&+?E4OvfX#9z?n9? z8eL7?8}dOIGymOdv<;=3VA|HZTzKTXz$ZI5ECqUUMN44U{g$n_Ov2&{9lKeG3qtt~ zKDklO(Cc-{!;hKRM;-?rr@Fm{6iW3;BjeW>)|)?kmLL95Qr>%$D#Uw-*vw%1sW+hk zlk3Km=vSuh8!9%q+$-?A5~bU8eaP|g?#F`SGE0CtVfwx8;pTp(C>*T3Ldzq zJV)yy&nI?d@z|Gv2frKPsK_q|?_`o~o$9$yn&3AM(y1q3t%+l?&lvN+5QC@C$S5L> zXe!3pWzeCI5Ub=c#DM(RC!~Be>^2UDj%D&15+*ddGo0ltU1rNSw8=xqxWCRk0MglV!j6zK+Zi>Ypn^@2Dw? zQIX%7*Mr?Or9!p3?la+TYSe{&l`Ph*t}kVjhK8?sW<&!QPwEs(gY?NB9b<~4HRI{) zaKtCvCRZK@{U5!!A7ledhBlAf_Zr!&_k zF-fO?vK1x&K!OmA#kDQjcZEq{FL-<3PPKD_a2DNg?rPOTSaQ#1w3t2n8+6pILA{}j zd)*nJtnx(H(f5Z!*g=ZGT=k`{N8Li4rivgohe*3lko2Hn;lQDy{3~jOI_d z>2hdblqbfsUn$wvl{arL6ZNhXX`CyWH_aG4-^aE%7|=LBjhJ=o>Y{-R+IaK5oFaCt zasOtn@UB9I%Li-uVJ@w>q$FQ>4w!3nqacOS9#4EVn+QN;p)(3BFqOL$3So0vK|Ew5 zhQKvAKTFz{O>jPHySX&(GSE6cgvKwUr~3l?3*`04?@LSaYLR?HpUwn$9UaaIa=ptz z-Ra_fu9xSbyv=ZPIix}2W4P=@wwQ$%Z4;W7>6;PY)D0?GMLb;3+3ZYu3>3m% zq@Q|3DbkwGWF>A8T-yy}%&a^QsKE(+TZ~gi#kQ?`VU#A+>Nu<$Oz;8i+g26+t#5>5 zy)Cs=K-ce74Bw}mRwi5>?V}`BUPh%?`|`59O@a}=$Ctm)jKB5b>J=%;jI+Ockn;fEFZ(D3X`XI=Nyu16|y#? zBR6iXH@=!*mLH;GO@3yT(k;6~=Y`1>HcJ0Y{9ND#ugp>~gUjRb1rj#bd0#)KTD5!f z2*rR+L5$A_YmV&;KD6cHEX@xpILY6amPiww$;z9P3THLR;&!ms%H!7i_uP{VwthRr zDzy}mRp>x&Y_ibhRJ{E?PGK{`gO@iwPhayf6HMZ+xOB$u=j3+YPTQa^j$x5yPgW08 z=c9x%!&~%}fg(U}@mIG$Jbli*&xZNJ{?>F8l}JNEbK*6W4!_6$b{}`|K!?{6xlQjp zALJ94Ln^V=FnVmjmPuH(&)O{o+A}qU<7T^FY@_<-)Jh6L3Bgr9VLX+ksRIx`vFP}L z=7-1hbLHu~P%Ev3Azt5Mj`^0-hMCr2kH}Uyk70dDN@~$M%lU|)V1?U7VQ!tBZHXTB zlKdy*u`%U(DjOFT3VPl>MXR^4%eXQOehUk5%InAwl2jua`jn?}$?|V|{B;Qv^%!(o z+hoycsPETa1{9t%cHbck?YDy7=}Ysx|FoNVFnrLQSB}{ln%c(pGCC^RRr}<4U{=kn zY6y0Epzhi)Y-NrJ8@%sX`7y@IWwa~7sd3KC^5LX%MO|ThgiW5gizZD`v5IKsbGasEdEn0N^YVIowhZwRv2tZRxv!== zu!A#nGQQ-r+wl*b(rGumiVYjkvf>atTx@{a^5vyN6*qR1JuGawg>}PJws#1SiR$7I8QRV! ziU~`MvdFd{k2NLV(Z_=n1t}$$TC<|6tG(84ZHR7#$#Y{{zcWwh65^_PEGwVv@Hvk$ z!V>gCTkjtI=6d`Bn_$ZWmNhQLcr&V-5zW!>c;VHF-m@4?X^Qya9(fZOoSK7K_?Fy6 zbLhkt%DVwJfx)s+B1tUPob?*Up39mc;eieWzcSmTctliYDMNt{X*rH%pUjf*VE@IU zrEYmz^e%{HrGGGNfw?Ed(}T!h3I+=>!}ZiqIpi^MvHmjBG{T!tv&FLUZua7nMMjNS z(oSK*bx@N+F65X%UvjA7EIGr@fOxUIY@}{-HjF>_!0e=5-w_)J=)zgR$RhPXii$5~ z>UQJ&=kE}Gvb2=BQ+!EHcf*XggkY5sEGF*vEa`|{&# zPtQ`F&pWl)3T>BTnJQDIJRwm$PKV5 zj0CfG20u$6v1u^GEdybH*Iglj>Z-E~!(WO^2;PcR_f4!5*)|vcz`;GT`P!v4gg6Ic zU~3U5i#w@%pRDq%mN5;<@9soN7QW~0zDh`U5d6+WLOKz`(6%=g2o67zE+YeeCDcV% z)+@9mrr|5zd%+))$XHLGcLeKz<@9leuZ&`~5F|0%=YmNuYKNNG!9qw5iw`qaU;U7U zpJB*itVb4#eZ6vu?S{^0g7BAt+E0h~=B3Z|np&NT$b9z^1}AS7YeJ;pmXXuWwAWO~ z9{RR=s~;j9c10N1Z{~vY7L~rbF!k|*Oa{APap^_FV=&q-15AJW6y3*JcT!yK1A z+qmrbzz2~7Yws_%c_JHiOVhk!HK<`n(7MBwzAm?_{tZc>!4>UL;Z2g}f&o@nNbJkc zppj$94rFCvG+|{lPpt(e5p^nb=EX-e`~|dsIL>&?qQP@yH9r1n;0s7kg>_&+C{n(z zXD=-xkOBUE<`ev&okvnQ7FKdf7=e@^cesIsv{<)4xwf@z`A5Ep2O8d3q9QsX6kn+R z#h0HEox6>trH4D$Z{2SWYh0X4ONwe5YsHIHn#qv{7RPg zef7oV8J!3Lb5{H&mcz(uy0w*Ph8TmT>}h5M#8U}JQU-6rU@dzQas0hzvLl4tTsOT! zX)oz?=_BjZ2sk7zgn+V)iL#QsIRU zg>`3HWalBJ>EMRBzhU9Jyv!58=iXx9f_omvE$&QkciqiS84zqtV%&T2{(48~caz?obqwRa( zVxH-@YNEUx|2D>#N4}EQgeIk+?;I&FVS@=RbL-R9lGD|-)m?^jhqD5WpQJh)K;1VT zRIqybwnnjtV1)cS!k3KGE5h5yw~6@9>FV`1k(e&mjTh{3Ba{1>VLAQx@(WuS0 zLer--_ntB;Of*BmlMI!gvT!zsxH-GHbD2B4S^m4!^1rkcYUzApB*7gZD)?T|P1fpS zp6$ZNvFD|Is>GNN!)l^@=Gs&E?vFa%ELJYqg|2XWEs1&^M*FzNdG2F%hsjaClHtae zD1#GJJ^W~2MZEXagib4MDk2Cyy)|3b3l__pUe?`OVarFw+KQclaOn03JRRO5G#@x+ zEQs)tzHphNQcO2ap__`M+az`&?{Vj7d#Xy;MF*O=N0uI)7NPLNcALKS4xIWvn>F`q z-0Q8!ps{POOf?o2_Ibtcg6W>`9fsj94h%@DH;!|1j^X%6-Vz(M^=wc%y@4{tiqs|GKvedkByI`M!O)d*Rp|Fm^Ajc zPvfDV77YC;v10J54!9+l-v+G68T+WG(o$Hs1z^cz7R!L+wAHg6vv_MwgiFUd=kKft z>{&WcTYnaqO(aI|D=KeHaACydpGe#SvgLJZsWA{CbLh_ZU0)~@7Ejj1HCDjS6E5FZ z2%9-liWct&DKJQ?+e78JwF$ z;NcH-e3pIgNQ#}Nbj%_`@v01aZX&!&O+oib*H!oFDH*$K%&=Qjyp~cy`TDVUKm>QGA~`1 zUlaTeUo`YbsKnYo7p#BZuHXBAC|^Use^v0;O4{#&Kl^+XC;n7d`&IC-)rCI`4xwC) z|G(DotDav|&VSN`_qPP}ufo4(8~+fNLM<5TC;pau{8huRxx7C#un_)ZX75)8zXl|K zD9A?T27ms-f5MYrMSu0p{tz9z^`EK#%~ShT%U{jtA36X4$2$PP-;C<7;(t8?|17>w c^C$6tABYez7HW3^0DROHfU?^k=zqTbACj%5p#T5? literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/testdtype.xlsx b/pandas/io/tests/data/testdtype.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c65263c373a3a43ee6e7adde9ec0a3abba219a9 GIT binary patch literal 8501 zcmeHMg8uQ5RJrrDr4gas}w+zOLeOI5ZA`*o9S4Ab1=Hd^#SIXDRpXwlU0HFfWkNcFfUO)TKv7 zTc}bpIdIJ9bf?Rx2@Bp0^nH))sS#>X+cu~5cFY~Tv5YBudJQ@z*{C~cks|JCVZ7rL zvOSO75P@xdUeOQwwwJKs$8&`R09;>V03d&(Wu+GQ_8|&u$|%<1p=fFBYH9Dr&GqB@ zFCG7fIrx{M7saZmwS)2DJMxzy$di$oXksa)=aTaEjGDgw3e!ZD5m|R=W|~=Pi8ZN% zFy#CieJ^{bW<(=4k&MTSd_`fTAd%Zu&k^v%a~F>rcUfE$9=Q}PbdWq7IT|@mmQ(a( zd)63rx3DHJQ>ka}_M_oFnNs2bt|!zuWI42O3bEt>y)G5qS+mn(v=J$dZ3MieS}>M%cE*EZy}1lc%wTeanGNN;BDa zZ%)-zq&gEH=_${ETsL#Zk)K`#-(FvuXNMq8dm(bSCoqgYZx(ez{gWgC;;$1AP%~18 zA_+AB8x3mD{U4q@cXG8eb8@o#aeDp188p-phVt^i-HOyzlsdq~jd)if;O8mN$Vn#L zxtX@Kz7k>gRIpF6F!K5y&e7bhGt?PX;>K`-d+zjiKRe+kT*M$dYQ2{iN{Z=4X1^l> zbm~9o$G|q*M$iSy0m(?WHoq2b;}O1bBdZmUMRKHrgjP@4n5jata+zcfYlq-`Pd|Y@ zc%Hg_zSWDuOkc&a|9P;+79y~sx|-n)yVc5dt@P9}*4sc&`oRM#90gGVPr%^%7sh+1 zt+L11`wf>?DxRP0TC>jaS7-~B=nDA7yy5N)AxJrTKlWI+m3PVYT00BwQWw4K%~Sgv zeTO?%%RSvAbEuX6CuH2GjF-t#aA8A*J4C2AP>}g6Sh6+xVu!)R4H}&f^W4=g8B&;w z$MuObkZ9v(#y5>wVo3|%g~TkY?4N|&H*hSMfo>|@j?m|w5$h|=(q4Wn#EoFL6;NR( zf$MVh7iT}Oe#_#-zK4c?!!m+xLjmfg)#zI(s#86uQ_qTR;;oQEJjtXWY8Q<4uo3}= zmRG7uw|w3voqK>bEhvSFh!s^?dxd4Y?hmO+G#TI4=Gkj4Q^h00B4!0)a?A;q9o>Fo z#T_s_e{3jXs$Gp;8lwo;AX0o~)#+tj6co``6*bZMGJJ~HhBLW**0CLw8JmeL_bun^ zDa%}WS{tu8O;kZ0T%)P~3Z&2is^!34dGi(q+Ex*oT#T)+*6yAE;8m%3geH z!3-}Ax0uJTegv6}X&TIi(I8jI)vDzF=M1a^ z_dWMxU8EtIVqMa}@YR|1`gia7d*2n6cfwMHc&P}@48|UN;Ttfztc{9(W$e19WP?9? zsq{8^fU69+y~Lf6)x(!W_}S*Z`HQY6?@p+->D8K&X`mfNnZ+*1!neOfP#CsaYZ>YZ z7`2&z{(~Sl8&g+H3k`QyTSsfRAA63)z)V3K7{6r?*7}^Sc5IYrnZcov*c;u)Kb^Ci zeAJyQd3{QsM7YH8%8xO4y#03NW~5lRGNHB-25x?@M|NRQ-&w^eS|@29p#zKit_(cr z?Epu4UYXKX2Fb>;p4*5CZo|i0>dBWY;z0HZWB%u2@HaFv3J4>b@?mxvbgh+u4oP7i zNCN@%vZ|W4k(|DW!s$pvd{+ei6#s}aTvx?bET{i<)pd4X_Z9G0ENRSWZdAGf0Ni5w zkre#K5_cO*2TShXuJ?ZoaZ69hiAIpP>E^m0!*RM!8FM!e(~Sg(K@ZN@1ahE?MJe{!sck{I&jx zOs83|*I;+$;A~OEjg_xCfEd1Iu{V#7=6&D$I5117FI0q3sVRt2l2T3U!LA!qpjw@G z7_rwiYD2$D7HL-1l`u#{!&f}gV?eV z-^B@VHf<7GCtrGowK^1J`x5iGWayHGw3)!mm^bRG%b|f$mKe`|sc2hU z*0erPfLtupI8`)noG^H{i)FFbr*V25G3nabNdx(4 zy`Ph(aBzxHL}>7b=Bwos?uRnXV%M8T!vmL4-Hz+a8M%kwPxI-q&-e$gN7e>>J@>Xo zZqi?$_iGS&8_qkD%x2=m*o35J_@w(gc7cnRv+u2Dt#>3n0108u(v5PGi?pOMT8SG3 z)pWraGAd5}t8auv7TqYNWZTp|GfEX|armSgg!cjM+eRfW#W(yRWOEHA$mN2P{$kW| z@r#RtU9_aii|Di}A6}NoBpCjCT)8{UxEs$eUlJ2dNGsl|cN~T|_gvtLW#49!5RiQw(+xMB3p=PqQ{}|)T{Kxl4Si;X@qKT6{gLjBmllBDY*iuh*o;kOLV$HSP zJ;w0_gG@=rVsS-pk#s#Sl*>LG+`|1_J%0$JEJz;Pp0ZD>k=7d1dB;=$#)7&k(V>O#ApU)~MZ<%U1AT7sOcDTXATc z@un>kXKA`y&PDpZq*$8ZL{`q6SU9s$7Q3CbMh?5qulu%Skac7~tJGXXW`RAavB^xQ zW6|ctYx(sEcV6DKTz$<4j4%l%aq0B!h2%Eg4%@&^j!z=<9<1)BP6r7?h7@$<0U{tT z@s~G0JbuQ!%ZB;fj$*8lQlvhiDe($=3%A?vX3u?OU%Tf4sSR?P4>HbepF(K$2|dn# z!z8rIdxb)t_C!tou<0oh%c!m?rGiXQLU5T+7)NDpbPtS6C_22S`QZWGR9V_K)JiL% zpV#LT$8>W^{X|QUdsGXY$FQ#WO-kV^%jtliV7cpAK~C*c+hRTHIk|D;p&{iuN*iYu zGCJNJ1*=Hd`Rh^)ehUkwH?N|CiBgPc=-xbjoh%pG?Wc>Ms7J5U(khEiLw%>_+`r(I zp^FMXq}K{c)syOR=W!SF$MBElypNczp((9wFJhvTU9^u5`zF=QD*Iu_d+IK|!dB+l zp&##fRD6!LavtnVaBP?|v%EK=TwYrcA7PVg?yN~uSfnCacBR7)6{=$KzJL4l6V^TM z_7dGbLN`8}53tE^qjI;a*K@c>WO?9DZPRjkJGS)k5pj=7d2(KkwqpgQ>!dGbwLRtU zKfa}1|1vJLPs@r!aDTQQYRi|K231(wPIkAjy)UdAs=6tOsM8^Uye#+n9D>Gaz(NHz z+}yeKkx6I;JHEu!&ygp^ISpFAO4P^maT+o69`tNef+c&hg{Lah)M$mVMV(hwV4qvN zwJ0&i|2?&#a%c`YHoRE{wb-7nnNp3Se!HXDi>jub~E1!Wy>aG+e-<#2!4bux35Pg!= zH&e&BlA6Zl5jTc_LkZnne5%Kp1T6i%&x|~u!!{g1sfg9IZ%{+z(8J-4PqtewZmV2t z%ZpR>183oB`s1Y@rNc)_)Un6*p_|7xhm7X5=2zlbr>vj`7S$Zy$7edd71vEmcN~I}y1LFkPj+|D)q1~Gi!0Z5 zK9s36Rm>F<#R0!7gD}UXfc$C)8mS(2k?N8b-%;6%r1nnmxs0Z)UpjPD%9jN&s)SaL zj^fP;b|&DOscNb)r)nzVnVA*}Hs$j(=eQ(do@6m3od<$n!Gd3~U17-(e;@y1lqsXa zq0jqitGK;Q9u!s>q4}mrIC~OKrk){?x@kk7$=is_q>~x{I9?15Vys1eZ~d#m+SN2` zCkc+pTnMPkm-y&~^(%WBt|G}7PqTsOyR*UPafPJe!#Iz8=(#mFgf*9CZ1ju5K=-wfz zIH_SsMew^hl9Pn*c)2a(-`Wd$Ya$_?2%&G?846GeKaegZ0e!{SMOW4Pdn4nr+SSoj)f#XyV(XukqXtpQgF+tF(=w9N<=qZYn|115ss&Y z7*}CaLAkSvU!CcF>|WS5 zH)3E7yR9s>;|@^>yLj=#;aGk%-`zS(wccmMsQV9T;cfY0lKL=*d5>1^r+i9#QGF}# z&o+6Y8gxriJ>xW}VF%FK{l%V6*UH{CNsz%MZGXXalIDy7uroOB#R7QX5V8eXoEc15 z9L!Z~hDk&p3!QlK5qw$#@9qyX9I|Ne99RtxkM}Ks^;B5<`h=q7YP)w*BLe8*7Zc;~ zy*3_6;W$|FF@6L>g4F&R65MRv_VCKqviX;M5f3oDwnSxg1Sq~x{)aC=GCDUKOG|e* z?%%rK0@l~BRiY-r#PFT?b9z+%gcDH0!KBN{@I_3spvu^KN%^HL?fa^;^OIX5c+8pc z>p=T~K-Ikd)U6qXNTVz6Q)5>vc>~n^0(1rp3ElM4RSZ zR%WP_v(DY7ILMT?jC3c*eu_HZY7xtsm}~6DcCqKYq5xYdMSKY>x8kei;V~5ipZ=4i z6Bn@AWzn{v;J&&uBK)qszpE*pSIU9LDd?TLKp)U*zE_nG#O_0-oQOkEp8R2e2U*2=+gXWIhRnt>@#Z(d{c1{JS{na=pmB3ItxU`-QubXIf>Qsj^=l%g zYGz+CoJ9p5`L1=fQIjo2Ar-~bPddzgO)$?k?T>C;$(9q!g&3_D0kgTL->L|5v;0~a zUL5#HTH~9PfG;@Synua7Xr5XftCAe6s;TNUoZ6ohXc(93s0Vjlw^PFE=vo`ZB7(Bz zR*0^C7+Ig5UUSM3*O=ehE$fO+jxJRGMgp zf+q<|pR#Z=hqyX9yK$R4xmx~SS@~C%LQS1_tfXBKm=eAd^!-MUp}Bj!K;DaIhZSum zkr6Xe`;`PEYzhKXe$9D^a{H~M2B&nqRB`~KRft8(s2{=|%-~4@fNe6-Kr!VcrJti(?{ z%jFcwg_62Ub0nAQ3+|+0mOdbw0bLW)mYmS%F^F2r+_<*er}#i0M$nrT{qfTITX!}C z9yKdps*n50Z0ZhvNW`Y=#dSvqYYhD(S^4$$z(+m7P~dXo$@T|i^Di}0Qw1F2SEwf> zsE~sAuNX9ScK#QFDD3^-Qe!2Zroi|wb}-KA@n>j+XCU|iMP~9(4oghYVymoGa?LZ# z?z}9D*&2(g8vHWhwHdJtgPzMM%+NkCK8zmhguGgL2LfW!*xfvihmOw}`jP`OYq2kNy|sYKYR`9sIqN_J`q*J`u%I<{`HT7Q8zMvrh+16$ Q02lT1M;+X6>3-b(AMX;Yj{pDw literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 49a508dd22023..9c909398d2d88 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -373,6 +373,33 @@ def test_reader_converters(self): actual = self.get_exceldf(basename, 'Sheet1', converters=converters) tm.assert_frame_equal(actual, expected) + def test_reader_dtype(self): + # GH 8212 + basename = 'testdtype' + actual = self.get_exceldf(basename) + + expected = DataFrame({ + 'a': [1, 2, 3, 4], + 'b': [2.5, 3.5, 4.5, 5.5], + 'c': [1, 2, 3, 4], + 'd': [1.0, 2.0, np.nan, 4.0]}).reindex( + columns=['a', 'b', 'c', 'd']) + + tm.assert_frame_equal(actual, expected) + + actual = self.get_exceldf(basename, + dtype={'a': 'float64', + 'b': 'float32', + 'c': str}) + + expected['a'] = expected['a'].astype('float64') + expected['b'] = expected['b'].astype('float32') + expected['c'] = ['001', '002', '003', '004'] + tm.assert_frame_equal(actual, expected) + + with tm.assertRaises(ValueError): + actual = self.get_exceldf(basename, dtype={'d': 'int64'}) + def test_reading_all_sheets(self): # Test reading all sheetnames by setting sheetname to None, # Ensure a dict is returned. From 8d28b651c9a5faf51a7681b9f3b82877b3818fec Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 2 Dec 2016 18:07:12 -0600 Subject: [PATCH 2/2] doc fixup --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 38743a4f931a5..f22374553e9c3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2546,7 +2546,7 @@ dtype Specifications As an alternative to converters, the type for an entire column can be specified using the `dtype` keyword, which takes a dictionary mapping column names to types. To interpret data with -no type inference, use the type `str` or `object`. +no type inference, use the type ``str`` or ``object``. .. code-block:: python