{ "cells": [ { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "model training 128\n", "model evaluation 91\n", "model deployment 75\n", "data cleaning 59\n", "model requirements 47\n", "feature engineering 36\n", "data collection 25\n", "Name: classification, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data = pd.read_csv('sampling_nb - sampling_nb.csv')\n", "\n", "data.drop(['second', 'url'], inplace=True, axis=1)\n", "\n", "data = data[~data['classification'].isin(['?', '', 'no pipeline', 'page not found', 'chinese', 'data labeling'])]\n", "\n", "data['classification'].value_counts()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "classification L2 class\n", "data cleaning DP-DF 8\n", " DP-LD 1\n", " DP-O 17\n", " DP-P 3\n", " DP-R 13\n", " DP-TE 9\n", " DP-TM 2\n", " DP-UT 6\n", "data collection DC-DC 13\n", " DC-DF 4\n", " DC-F 3\n", " DC-NS 1\n", " DC-O 1\n", " DC-S 3\n", "feature engineering FE-BC 8\n", " FE-CP 8\n", " FE-H 10\n", " FE-O 4\n", " FE-T 6\n", "model deployment MD-CI 44\n", " MD-LR 6\n", " MD-O 10\n", " MD-SM 14\n", " ME-O 1\n", "model evaluation ME-AR 30\n", " ME-C 29\n", " ME-O 20\n", " ME-RQ 8\n", " ME-TP 4\n", "model requirements MR-AM 18\n", " MR-FR 25\n", " MR-NM 2\n", " MR-O 2\n", "model training MT-BL 28\n", " MT-GPU 19\n", " MT-O 49\n", " MT-RU 10\n", " MT-TT 16\n", " loss 6\n", "dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.groupby(['classification', 'L2 class']).size()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 1 }