{ "cells": [ { "cell_type": "code", "execution_count": 24, "id": "e2daced3-45d5-40ba-a2d6-88b3d1c01326", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col1col2col3
00.00.00.0
10.00.00.0
20.00.00.0
30.00.00.0
40.00.00.0
\n", "
" ], "text/plain": [ " col1 col2 col3\n", "0 0.0 0.0 0.0\n", "1 0.0 0.0 0.0\n", "2 0.0 0.0 0.0\n", "3 0.0 0.0 0.0\n", "4 0.0 0.0 0.0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 1. Using any method covered in the class, create a dataframe with 5 rows, and 3 columns named col1, col2, col3\n", "# filled with zero values. \n", "import pandas as pd\n", "import numpy as np\n", "\n", "df = pd.DataFrame(np.zeros((5,3)),columns=[f'col{x}' for x in [1,2,3]])\n", "df" ] }, { "cell_type": "code", "execution_count": 25, "id": "db4e18cb-44a8-43fc-bf1a-78c8604c0792", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateAMZNFB
02018-01-011.0000001.000000
12018-01-081.0618810.959968
22018-01-151.0532400.970243
32018-01-221.1406761.016858
42018-01-291.1633741.018357
............
1002019-12-021.4250611.075997
1012019-12-091.4326601.038855
1022019-12-161.4534551.104094
1032019-12-231.5212261.113728
1042019-12-301.5033601.098475
\n", "

105 rows × 3 columns

\n", "
" ], "text/plain": [ " date AMZN FB\n", "0 2018-01-01 1.000000 1.000000\n", "1 2018-01-08 1.061881 0.959968\n", "2 2018-01-15 1.053240 0.970243\n", "3 2018-01-22 1.140676 1.016858\n", "4 2018-01-29 1.163374 1.018357\n", ".. ... ... ...\n", "100 2019-12-02 1.425061 1.075997\n", "101 2019-12-09 1.432660 1.038855\n", "102 2019-12-16 1.453455 1.104094\n", "103 2019-12-23 1.521226 1.113728\n", "104 2019-12-30 1.503360 1.098475\n", "\n", "[105 rows x 3 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2. read stocks.csv into a dataframe using only date, AMZN, and FB columns\n", "df=pd.read_csv('files/stocks.csv',usecols=['date','AMZN','FB'],parse_dates=['date'])\n", "df" ] }, { "cell_type": "code", "execution_count": 29, "id": "2d70e13c-f5bc-49d7-a994-4892cc55e4d4", "metadata": {}, "outputs": [], "source": [ "# 3. use pandas to convert Sample.xlsx, Sheet1 to a csv file, excluding the index\n", "df = pd.read_excel('files/Sample.xlsx',sheet_name='Sheet1')\n", "df.to_csv('files/convert_out.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 15, "id": "addf673f-45ce-4b62-aacc-18877aabb5b8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateAMZNFB
92018-03-051.2845490.991330
102018-03-121.2786830.990581
162018-04-231.2794470.929034
172018-04-301.2862240.945197
182018-05-071.3040911.000749
............
1002019-12-021.4250611.075997
1012019-12-091.4326601.038855
1022019-12-161.4534551.104094
1032019-12-231.5212261.113728
1042019-12-301.5033601.098475
\n", "

88 rows × 3 columns

\n", "
" ], "text/plain": [ " date AMZN FB\n", "9 2018-03-05 1.284549 0.991330\n", "10 2018-03-12 1.278683 0.990581\n", "16 2018-04-23 1.279447 0.929034\n", "17 2018-04-30 1.286224 0.945197\n", "18 2018-05-07 1.304091 1.000749\n", ".. ... ... ...\n", "100 2019-12-02 1.425061 1.075997\n", "101 2019-12-09 1.432660 1.038855\n", "102 2019-12-16 1.453455 1.104094\n", "103 2019-12-23 1.521226 1.113728\n", "104 2019-12-30 1.503360 1.098475\n", "\n", "[88 rows x 3 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 4. Using the dataframe created in question 2, create another dataframe containing only rows where\n", "# the price of AMZN is greater than 1.25\n", "df2 = df[df.AMZN > 1.25]\n", "df2" ] }, { "cell_type": "code", "execution_count": 18, "id": "ded80d46-7411-47b5-8c2d-99cfa63abda4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateAMZNFB
92018-03-051.2845490.991330
102018-03-121.2786830.990581
162018-04-231.2794470.929034
172018-04-301.2862240.945197
182018-05-071.3040911.000749
192018-05-141.2808710.977683
202018-05-211.3099810.989671
212018-05-281.3355191.038212
222018-06-041.3700551.012042
232018-06-111.3960741.048167
242018-06-181.3958301.079690
252018-06-251.3829181.039979
262018-07-021.3917291.087664
272018-07-091.4750391.109553
282018-07-161.4755841.123575
292018-07-231.4784890.935991
302018-07-301.4833870.951458
312018-08-061.5346500.964731
322018-08-131.5313310.930158
332018-08-201.5501810.934707
342018-08-271.6374940.940487
352018-09-031.5881590.872572
362018-09-101.6029010.868718
372018-09-171.5580080.871983
382018-09-241.6295950.880171
392018-10-011.5373760.842012
402018-10-081.4551720.822799
412018-10-151.4351740.824458
422018-10-221.3365520.778004
432018-10-291.3550370.804656
442018-11-051.3931940.775809
452018-11-121.2963620.746749
472018-11-261.3750830.752529
482018-12-031.3254230.735456
492018-12-101.2951410.770993
522018-12-311.2817010.738293
\n", "
" ], "text/plain": [ " date AMZN FB\n", "9 2018-03-05 1.284549 0.991330\n", "10 2018-03-12 1.278683 0.990581\n", "16 2018-04-23 1.279447 0.929034\n", "17 2018-04-30 1.286224 0.945197\n", "18 2018-05-07 1.304091 1.000749\n", "19 2018-05-14 1.280871 0.977683\n", "20 2018-05-21 1.309981 0.989671\n", "21 2018-05-28 1.335519 1.038212\n", "22 2018-06-04 1.370055 1.012042\n", "23 2018-06-11 1.396074 1.048167\n", "24 2018-06-18 1.395830 1.079690\n", "25 2018-06-25 1.382918 1.039979\n", "26 2018-07-02 1.391729 1.087664\n", "27 2018-07-09 1.475039 1.109553\n", "28 2018-07-16 1.475584 1.123575\n", "29 2018-07-23 1.478489 0.935991\n", "30 2018-07-30 1.483387 0.951458\n", "31 2018-08-06 1.534650 0.964731\n", "32 2018-08-13 1.531331 0.930158\n", "33 2018-08-20 1.550181 0.934707\n", "34 2018-08-27 1.637494 0.940487\n", "35 2018-09-03 1.588159 0.872572\n", "36 2018-09-10 1.602901 0.868718\n", "37 2018-09-17 1.558008 0.871983\n", "38 2018-09-24 1.629595 0.880171\n", "39 2018-10-01 1.537376 0.842012\n", "40 2018-10-08 1.455172 0.822799\n", "41 2018-10-15 1.435174 0.824458\n", "42 2018-10-22 1.336552 0.778004\n", "43 2018-10-29 1.355037 0.804656\n", "44 2018-11-05 1.393194 0.775809\n", "45 2018-11-12 1.296362 0.746749\n", "47 2018-11-26 1.375083 0.752529\n", "48 2018-12-03 1.325423 0.735456\n", "49 2018-12-10 1.295141 0.770993\n", "52 2018-12-31 1.281701 0.738293" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Using the dataframe created in question 4, display rows where AMZN is greater than FB and date is before 2019\n", "# hint: the column needs to be date type and use datetime module to compare against\n", "\n", "import datetime as dt\n", "df2[df2.date < dt.datetime(2019,1,1)]" ] }, { "cell_type": "code", "execution_count": null, "id": "f59d8ce6-2949-4199-9a3a-a2b6ab9e74e4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }