<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>어깨빠진보부상</title>
    <link>https://bbgw-oshoulder.tistory.com/</link>
    <description>.</description>
    <language>ko</language>
    <pubDate>Tue, 2 Jun 2026 08:01:46 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>가라어퍼</managingEditor>
    <image>
      <title>어깨빠진보부상</title>
      <url>https://tistory1.daumcdn.net/tistory/8661080/attach/34ee3ad9770445b6ad3b8c0523875cf0</url>
      <link>https://bbgw-oshoulder.tistory.com</link>
    </image>
    <item>
      <title>8주차 Note: 딥러닝</title>
      <link>https://bbgw-oshoulder.tistory.com/8</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;KakaoTalk_20260531_232908822.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bkemlE/dJMcahq6V7n/PCBk1JTeZ8rkMVX2sRYSNk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bkemlE/dJMcahq6V7n/PCBk1JTeZ8rkMVX2sRYSNk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bkemlE/dJMcahq6V7n/PCBk1JTeZ8rkMVX2sRYSNk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkemlE%2FdJMcahq6V7n%2FPCBk1JTeZ8rkMVX2sRYSNk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;493&quot; height=&quot;263&quot; data-filename=&quot;KakaoTalk_20260531_232908822.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;8주차: 5월 25일 ~ 5월 29일&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt; 부트캠프 8주차 요약✍️&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/25 ] &lt;span style=&quot;color: #fe6b00;&quot;&gt;대체공휴일 휴강&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/26 ]&amp;nbsp;&lt;b&gt;딥러닝&lt;/b&gt;: RNN, LSTM_회귀&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/27 ] &lt;b&gt;딥러닝&lt;/b&gt;: LSTM_분류&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/28 ] &lt;b&gt;투자 전략&lt;/b&gt;: Bollinger Band, BuyandHold&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/29 ] &lt;b&gt;투자 전략&lt;/b&gt;: Momentum&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;5월 26일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 틀리면서 배우는 딥~러~닝~&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;학습률 0.001을 0.01로 바꾸니까 MSE 진짜 난리나던데 하... &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;0 하나를 못봐서 점심시간 내내 코드를 보고 있었다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  RNN&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;AAPL(애플 주식) 데이터 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201089168&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd 
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv(&quot;../csv/aapl.csv&quot;)
df.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;780&quot; data-origin-height=&quot;280&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bImocD/dJMcabRSPdR/Z7KdyvFWswVdY0FRpNy4F1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bImocD/dJMcabRSPdR/Z7KdyvFWswVdY0FRpNy4F1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bImocD/dJMcabRSPdR/Z7KdyvFWswVdY0FRpNy4F1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbImocD%2FdJMcabRSPdR%2FZ7KdyvFWswVdY0FRpNy4F1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;535&quot; height=&quot;192&quot; data-origin-width=&quot;780&quot; data-origin-height=&quot;280&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201165841&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df.dropna(inplace=True)

df = df[['Date', 'Adj Close']]
values = df[['Adj Close']].values

df['Adj Close'].values.reshape(-1, 1)

split_idx = int(len(values) * 0.75)

train_data = values[:split_idx]
test_data = values[split_idx:]

# MinMaxScaler로 데이터 정규화
scaler = MinMaxScaler()
train_sc = scaler.fit_transform(train_data)
test_sc = scaler.transform(test_data)

# 스케일링이 완료된 데이터를 Tensor로 변환 
train_sc = torch.tensor(train_sc, dtype=torch.float32)
test_sc = torch.tensor(test_sc, dtype=torch.float32)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;Dataset 생성, DataLoader 사용을 통한 train_dl/test_dl 생성&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201473879&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class WindowDataset(Dataset):
    def __init__(self, _data, _window):
        # _data : tensor, array 1차원 데이터 형태
        # _window : 구간의 크기 
        self.data = _data
        self.window = _window
        # DataLoader에서 사용 가능한 인덱스의 최대 값
        self.n = len(_data) - _window

    def __len__(self):
        return self.n
    
    def __getitem__(self, idx):
        # idx : 0 ~ self.n-1 사이의 정수가 대입 (DataLoader에서 자동으로 대입)
        x = self.data[idx:idx + self.window]
        y = self.data[idx + self.window]
        return x, y
 
# WindowDataset에 데이터를 대입 
train_ds = WindowDataset(train_sc, _window=60)
test_ds = WindowDataset(test_sc, _window=60)

# Dataset를 DataLoader로 생성 
train_dl = DataLoader(train_ds, batch_size=128, shuffle = True, drop_last = True)
test_dl = DataLoader(test_ds, batch_size=256, shuffle=False, drop_last = False)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;RNN Model, 검증 데이터 평가 함수 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RNN Model&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201664205&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# RNN 모델 정의 
class RNNModel(nn.Module):
    def __init__(self,
                 input_size, 
                 hidden_size = 64, 
                 num_layers = 1, 
                 dropout = 0.0, 
                 nonlinearity = 'tanh', 
                 bidirectional = False):
        super(RNNModel, self).__init__()
        # super().__init__()
        self.rnn = nn.RNN(
            input_size = input_size,
            hidden_size = hidden_size, 
            num_layers = num_layers, 
            dropout = dropout, 
            nonlinearity = nonlinearity, 
            bidirectional = bidirectional, 
            # batch_first가 False인 경우, 입력 데이터(구간의 개수, 배치크기, 입력 데이터 피쳐의 수)
            # (배치 크기, 구간의 크기, 입력 피쳐의 개수) --&amp;gt; True로 변경 
            batch_first = True
        )

        # output_feature가 역방향을 포함한다면 2배로 늘어난다. 
        if bidirectional:
            hidden_size *= 2
        print(f&quot;hidden_size : {hidden_size}&quot;)
        self.model = nn.Linear(hidden_size, 1)
        
        
    def forward(self, x):
        out, h_n = self.rnn(x)
        last_hidden = h_n[-1]
        result = self.model(last_hidden)
        return result&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780201697765&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 모델 학습 시 검증 데이터를 이용하여 모델의 성능을 평가 할수 있도록 검증 데이터 평가 함수 

@torch.no_grad()
def evaluate_mse(dataloader, model):
    # dataloader -&amp;gt; 검증 데이터셋의 dataloader
    model.eval()
    total_loss = 0
    total_n = 0
    for x, y in dataloader:
        x = x.float()
        y = y.float()
        pred = model(x)
        loss = nn.MSELoss()(pred, y)
        total_loss += loss.item() * x.size(0)
        total_n += x.size(0)
    return total_loss / max(total_n, 1)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RNN Model 호출, 평가 지표, optimizer 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201773631&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;aapl_model = RNNModel(
    input_size = 1, 
    hidden_size = 128
)

criterion = nn.MSELoss()
optimizer = optim.Adam(aapl_model.parameters(), lr = 0.001)		# 문제의 0.01과 0.001...&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1780201755501&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;train_history, test_history = [], []
for epoch in range(20):
    aapl_model.train()
    running, n_seen = 0.0, 0
    for x, y in train_dl:
        x = x.float()
        y = y.float()
        pred = aapl_model(x)
        loss = criterion(pred, y)
        optimizer.zero_grad()
        loss.backward()
        # 가중치 발산 방지 
        nn.utils.clip_grad_norm_(aapl_model.parameters(), 1.0)
        optimizer.step()

        running += loss.item() * y.size(0)
        n_seen += y.size(0)
        
    train_mse = running / n_seen
    test_mse = evaluate_mse(test_dl, aapl_model)
    
    train_history.append(train_mse)
    test_history.append(test_mse)
    print(f&quot;Epoch {epoch+1} Train MSE: {round(train_mse, 8)} Test MSE: {round(test_mse, 8)}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;852&quot; data-origin-height=&quot;393&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/U6O0R/dJMcadoBbKG/fHTeSkqvv5q3Gcf2ApsKk0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/U6O0R/dJMcadoBbKG/fHTeSkqvv5q3Gcf2ApsKk0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/U6O0R/dJMcadoBbKG/fHTeSkqvv5q3Gcf2ApsKk0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FU6O0R%2FdJMcadoBbKG%2FfHTeSkqvv5q3Gcf2ApsKk0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;852&quot; height=&quot;393&quot; data-origin-width=&quot;852&quot; data-origin-height=&quot;393&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780201918284&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;aapl_model.eval()		# 평가 모드

preds = []
trues = []

with torch.no_grad():
    for x, y in test_dl:
        x = x.float()
        y = y.float()

        pred = aapl_model(x)
        preds.append(pred.cpu())
        trues.append(y.cpu())
        

# cat 함수: pd.concat() 과 같은 역할
preds = torch.cat(preds, dim=0).squeeze(-1).numpy()
trues = torch.cat(trues, dim=0).squeeze(-1).numpy()


# 시각화
plt.figure(figsize = (14, 10))
plt.plot(preds[:600], label = 'Preds')
plt.plot(trues[:600], label = 'Trues')
plt.legend()
plt.grid()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리61.png&quot; data-origin-width=&quot;1136&quot; data-origin-height=&quot;813&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/qHDXq/dJMcada4b62/dKaPAaarrDf3Fa5hzrtWKK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/qHDXq/dJMcada4b62/dKaPAaarrDf3Fa5hzrtWKK/img.png&quot; data-alt=&quot;성능 뭐야 대박&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/qHDXq/dJMcada4b62/dKaPAaarrDf3Fa5hzrtWKK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqHDXq%2FdJMcada4b62%2FdKaPAaarrDf3Fa5hzrtWKK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1136&quot; height=&quot;813&quot; data-filename=&quot;티스토리61.png&quot; data-origin-width=&quot;1136&quot; data-origin-height=&quot;813&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;성능 뭐야 대박&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  LSTM&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;AAPL 데이터 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;코드는 RNN과 매우 유사하지만, 예측값을 2개 돌려주는 RNN과 달리 LSTM은 3개를 돌려준다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;또한 nn.LSTM에는 nn.RNN에 없는 head_type이라는 인자가 존재한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780202087808&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;함수화를 위한 작업&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780204117987&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 하나의 함수화
# 매개변수로 사용할 데이터들은 어떤 것이 있을까?

# 파일의 경로를 지정하는 변수 (read_csv)
file_path = '../csv/aapl.csv'

# 구간(window) 설정 (Dataset)
window = 60

# batch size 지정 (DataLoader)
x_batch = 64
y_batch = 256

# 기여도 (optimizer)
lr = 0.001

# 반복 학습 횟수
epochs = 20

# 은닉층에서의 뉴런(feature)의 개수
hidden_cnt = 64

# Layer 설정값
layer_cnt = 1

# train, test 비율
train_ratio = 0.8

# 은닉층, 셀 구조에서 어떤 부분을 사용할 것인가?
    # 'h': 은닉층 사용 // 'c': 셀 사용 // 'h_c': 둘 다 사용
head_type = 'h_c'&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 로드, 전처리&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780204192567&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.read_csv(file_path)
df.dropna(inplace=True)

df['Date'] = pd.to_datetime(df['Date'])
X = df[['Adj Close', 'Volume']].astype(float).values
y = df[['Adj Close']].astype(float).values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size = train_ratio, shuffle = False
)

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_sc = scaler_x.fit_transform(X_train)
X_test_sc = scaler_x.transform(X_test)
y_train_sc = scaler_y.fit_transform(y_train)
y_test_sc = scaler_y.transform(y_test)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;Dataset 생성, DataLoader 사용을 통한 train_dl/test_dl 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780204598726&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class WindowDataset(Dataset):
    def __init__(self, _x, _y, _window):
        self.x = _x
        self.y = _y
        self.window = _window
        self.n = (len(_x) - _window)
    
    def __len__(self):
        return max(self.n, 1)
    
    def __getitem__(self, idx):
        x = self.x[idx  :  idx + self.window]
        y = self.y[idx + self.window]
        x_tensor = torch.tensor(x, dtype = torch.float32)
        y_tensor = torch.tensor(y, dtype = torch.float32)
        return x_tensor, y_tensor

train_ds = WindowDataset(X_train_sc, y_train_sc, window)
test_ds = WindowDataset(X_test_sc, y_test_sc, window)

train_dl = DataLoader(train_ds, shuffle = True, drop_last = True, batch_size = x_batch)
test_dl = DataLoader(test_ds, shuffle = False, drop_last=False, batch_size = y_batch)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;LSTM 정의&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205223218&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class LSTMModel(nn.Module):
    
    def __init__(
            self,
            input_size,
            hidden_size = 64,
            num_layers = 1,
            dropout = 0.0,
            bidirectional = False,
            batch_first = True,
            head_type = 'h'
    ):
        super(LSTMModel, self).__init__()

        # LSTM 기본 설정
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = batch_first
        )

        # bidirectional이 True면 out feature의 개수가 2배
        hidden_size = hidden_size * (2 if bidirectional else 1)

        # head_type에 따라서 출력의 feature 개수가 달라진다.
        if head_type in ['h', 'c']:
        #if (head_type == 'h') | (head_type == 'c')
            pass
        elif head_type == 'h_c':
            hidden_size = hidden_size * 2
        else:
            print('head_type의 인자값을 잘못 입력하였습니다. (&quot;h&quot;, &quot;c&quot;, &quot;h_c&quot;)를 사용해주세요')
        
        # 선형 모델 생성
        self.model = nn.Linear(hidden_size, 1)
            # 객체 안에 독립적인 데이터를 저장
        self.head_type = head_type

    # 순전파 함수 정의
    def forward(self, x):
        # 순전파의 예측값 (RNN은 2개를 되돌려줌, LSTM은 3개를 되돌려줌)
        out, (h_n, c_n) = self.lstm(x)
        # 은닉층의 가장 마지막값 저장
        h_last = h_n[-1]
        c_last = c_n[-1]

        if self.head_type == 'h':
            feat = h_last
        elif self.head_type == 'c':
            feat = c_last
        elif self.head_type == 'h_c':
            feat = torch.cat([h_last, c_last], dim = -1)
        
        return self.model(feat)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;여기서 __init__ 함수에 들어간 parameter들은 기본값으로 사용될 것이다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205300399&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model = LSTMModel(input_size = 2, hidden_size = hidden_cnt, head_type = head_type)
model&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;840&quot; data-origin-height=&quot;90&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/WRk3x/dJMcacpM8Yf/yseCRYgAL6DQFeH33vMP11/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/WRk3x/dJMcacpM8Yf/yseCRYgAL6DQFeH33vMP11/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/WRk3x/dJMcacpM8Yf/yseCRYgAL6DQFeH33vMP11/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FWRk3x%2FdJMcacpM8Yf%2FyseCRYgAL6DQFeH33vMP11%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;840&quot; height=&quot;90&quot; data-origin-width=&quot;840&quot; data-origin-height=&quot;90&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;손실 함수, 옵티마이저 정의&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205370015&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = lr)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;평가 함수 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205431445&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 검증 데이터의 loss값 확인하는 함수

# @torch.no_grad()
def evaluate_mse(dl):
    model.eval()

    total_loss, total_n = 0.0, 0
    # 데코레이터 또는 여기 사용
    with torch.no_grad():
        for x, y in dl:
            x = x.float()
            y = y.float()
            pred = model(x)
            loss = criterion(pred, y)
            total_loss += loss.item() * y.size(0)
            total_n += y.size(0)
    
    return total_loss / max(total_n, 1)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;반복 학습&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205449263&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 반복 학습

test_history = []

for epoch in range(epochs):
    model.train()

    total_loss, total_n = 0.0, 0
    
    for x, y in train_dl:
        x = x.float()
        y = y.float()

        pred = model(x)
        loss = criterion(pred, y)

        optimizer.zero_grad()
        loss.backward()
        # 기울기 발산 방지 (이 자리 아니면 소용 없음)
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        total_n += y.size(0)
    
    train_loss = total_loss / max(total_n, 1)
    test_loss = evaluate_mse(test_dl)
    test_history.append(test_loss)
    print(f&quot;Epoch: {epoch+1}, train_mse: {round(train_loss, 4)}, test_mse: {round(test_loss, 4)}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;395&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/5WBaY/dJMcaaMeEEA/N7zgA7u2m3fco6fDdzPoJ0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/5WBaY/dJMcaaMeEEA/N7zgA7u2m3fco6fDdzPoJ0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/5WBaY/dJMcaaMeEEA/N7zgA7u2m3fco6fDdzPoJ0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F5WBaY%2FdJMcaaMeEEA%2FN7zgA7u2m3fco6fDdzPoJ0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;843&quot; height=&quot;395&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;395&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;간단 시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205482618&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# test_history를 그래프로 시각화

plt.plot(test_history)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리62.png&quot; data-origin-width=&quot;556&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bIko59/dJMcahq6JAB/7XKuh43aqoR8HTqfcmR7y0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bIko59/dJMcahq6JAB/7XKuh43aqoR8HTqfcmR7y0/img.png&quot; data-alt=&quot;끝에서 mse 값이 상승하는 것은 과적합으로 볼 수 있다.&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bIko59/dJMcahq6JAB/7XKuh43aqoR8HTqfcmR7y0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbIko59%2FdJMcahq6JAB%2F7XKuh43aqoR8HTqfcmR7y0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;319&quot; height=&quot;237&quot; data-filename=&quot;티스토리62.png&quot; data-origin-width=&quot;556&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;끝에서 mse 값이 상승하는 것은 과적합으로 볼 수 있다.&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;예측값 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780205556607&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.eval()

preds, trues = [], []

with torch.no_grad():
    for x, y in test_dl:
        x = x.float()
        pred = model(x)
        preds.append(pred)
        trues.append(y)

# tensor 데이터들을 하나로 합쳐주고 차원을 벗겨낸 다음 array로 변환
preds = torch.cat(preds, dim=0).numpy()
trues = torch.cat(trues, dim=0).numpy()

# 스케일링 데이터를 원본 데이터로 변환
pred_origin = scaler_y.inverse_transform(preds).squeeze(-1)
true_origin = scaler_y.inverse_transform(trues).squeeze(-1)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780206983538&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize = (16, 10))
plt.plot(pred_origin, label = 'Pred Origin')
plt.plot(true_origin, label = 'True Origin')

plt.legend()
plt.grid()
plt.xlabel('Date')
plt.ylabel('Adj Close')

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리63.png&quot; data-origin-width=&quot;1315&quot; data-origin-height=&quot;833&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/VjKxE/dJMcajh6q5e/kKnAIdWCfalZ5nK3StMdkK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/VjKxE/dJMcajh6q5e/kKnAIdWCfalZ5nK3StMdkK/img.png&quot; data-alt=&quot;성능 대박&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/VjKxE/dJMcajh6q5e/kKnAIdWCfalZ5nK3StMdkK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FVjKxE%2FdJMcajh6q5e%2FkKnAIdWCfalZ5nK3StMdkK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1315&quot; height=&quot;833&quot; data-filename=&quot;티스토리63.png&quot; data-origin-width=&quot;1315&quot; data-origin-height=&quot;833&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;성능 대박&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;5월 27일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 시간이 순식간에 흘러간다... 분명 한 건 많지 않은 것 같은데 뇌에 과부하가 걸려서 진행을 못 하는 느낌 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  LSTM_분류&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;날씨 데이터 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780210184085&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import platform

from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

---

url = &quot;https://archive-api.open-meteo.com/v1/archive&quot;

params = {
    'latitude': 37.5665,
    'longitude': 126.9780,
    'start_date': '2025-01-01',
    'end_date': '2025-12-31',
    'hourly': 'temperature_2m,relative_humidity_2m,surface_pressure,wind_speed_10m,rain'
}

res = requests.get(url, params = params)
data = res.json()


# 날씨 데이터들을 따로 추출
df = pd.DataFrame(data['hourly'])
df.columns = ['측정 시간', '기온', '상대 습도', '지면 기압', '풍속', '강수량']&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212514659&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = df[['기온', '상대 습도', '지면 기압', '풍속']].values
y = df['target'].values

x_sc = MinMaxScaler().fit_transform(x)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Dataset 생성, DataLoader 사용 train_dl/test_dl 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212568083&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class windowDS(Dataset):
    def __init__(self, _x, _y, _window):
        self.x = _x
        self.y = _y
        self.window = _window
        self.n = len(_x) - _window
    
    def __len__(self):
        return self.n

    def __getitem__(self, idx):
        x = self.x[ idx : idx + self.window ]
        y = self.y[ idx + self.window ]
        x_tensor = torch.tensor(x).float()
        y_tensor = torch.tensor(y).long()

        return x_tensor, y_tensor

window = 72
train_ds = windowDS(x_sc, y, window)
train_dl = DataLoader(train_ds, batch_size = 64, shuffle=False)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;LSTM 모델 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212615167&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class LSTMclf(nn.Module):
    def __init__(
            self,
            input_size,
            hidden_size
    ):
        super(LSTMclf, self).__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            batch_first = True
        )
        self.model = nn.Linear(hidden_size, 2)
    
    def forward(self, x):
        out, (h_n, c_n) = self.lstm(x)
        # h_n[-1], c_n[-1]: bidirectional이 False인 경우 사용 가능
            # True인 경우 역방향의 아웃풋 피쳐만 불러온다.
        # out: 3차원 데이터셋 &amp;rarr; [batch, window, feature]
        # 2차원 데이터셋의 형태로 변경 &amp;rarr; out[:, -1, :] &amp;rarr; [batch, feature]
        pred = self.model(out[:, -1, :])
        return pred&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델 정의, 손실 함수, 옵티마이저 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212661572&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model = LSTMclf(input_size = x.shape[1], hidden_size=32)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;반복 학습&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212713598&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.train()

for epoch in range(20):
    total_loss, total_n = 0.0, 0
    current1 = current2 = 0
    acc_list = []
    
    for x, y in train_dl:
        res = model(x)
        loss = criterion(res, y)
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        total_loss += loss.item()
        total_n += y.size(0)
        _, pred = torch.max(res, 1)
        acc = accuracy_score(y.numpy(), pred.numpy())
        acc_list.append(acc)

        current1 += (y == pred).sum().item()
    
    train_loss = total_loss / len(train_dl)
    mean_current = (current1 / max(total_n, 1)) * 100
    current2 = np.mean(acc_list)

    print(f'Epoch: {epoch+1}, EntropyLoss: {round(train_loss, 3)}, \
    Acc1: {round(mean_current, 3)}, Acc2: {round(current2, 3)}')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;842&quot; data-origin-height=&quot;393&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/rPVrK/dJMcaaFw8gY/0cbSPnvMe6T0pfKblZ5C81/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/rPVrK/dJMcaaFw8gY/0cbSPnvMe6T0pfKblZ5C81/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/rPVrK/dJMcaaFw8gY/0cbSPnvMe6T0pfKblZ5C81/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FrPVrK%2FdJMcaaFw8gY%2F0cbSPnvMe6T0pfKblZ5C81%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;842&quot; height=&quot;393&quot; data-origin-width=&quot;842&quot; data-origin-height=&quot;393&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;예측&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212792017&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.eval()

eval_loader = DataLoader(train_ds, batch_size = 1000, shuffle=False)

# 첫번째 데이터만 로드
x, y = next(iter(eval_loader))

with torch.no_grad():
    test_out = model(x)
    _, test_pred = torch.max(test_out, 1)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;시각화&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780212982387&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 한글 인코딩을 위해
if platform.system() == 'Windows':
    plt.rc('font', family = 'Malgun Gothic')

plt.rcParams['axes.unicode_minus'] = False

# 시각화
plt.figure(figsize=(15, 5))
plt.plot(y.numpy(), label = '실제 강수 여부', color = 'gray', drawstyle = 'steps-mid', alpha = 0.5)
plt.plot(test_pred.numpy(), label = 'LSTM의 결과', color = 'blue', linestyle = '--', drawstyle = 'steps-mid', alpha=0.7)
plt.yticks([0, 1], ['맑음(0)', '비(1)'])
plt.legend()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리64.png&quot; data-origin-width=&quot;1236&quot; data-origin-height=&quot;427&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/brzlCj/dJMcaaern7R/Lkouz94p1PsD8ZpaKnSIUk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/brzlCj/dJMcaaern7R/Lkouz94p1PsD8ZpaKnSIUk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/brzlCj/dJMcaaern7R/Lkouz94p1PsD8ZpaKnSIUk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbrzlCj%2FdJMcaaern7R%2FLkouz94p1PsD8ZpaKnSIUk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1236&quot; height=&quot;427&quot; data-filename=&quot;티스토리64.png&quot; data-origin-width=&quot;1236&quot; data-origin-height=&quot;427&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;5월 28일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 클래스 지옥 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  Bollllinger Band&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Amazon 주식 데이터(AMZN) 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780213932898&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

df = pd.read_csv('../csv/AMZN.csv', index_col = 'Date')
df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;202&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bhDUvg/dJMcahR6OhO/mtDpZMUV0IcgKCqw8NHg0K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bhDUvg/dJMcahR6OhO/mtDpZMUV0IcgKCqw8NHg0K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bhDUvg/dJMcahR6OhO/mtDpZMUV0IcgKCqw8NHg0K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhDUvg%2FdJMcahR6OhO%2FmtDpZMUV0IcgKCqw8NHg0K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;843&quot; height=&quot;202&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;202&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리, 파생 변수 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780214097424&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;flag = df.isin( [np.nan, np.inf, -np.inf] ).any(axis = 1)
df = df.loc[~flag, ]

# 종가 이외의 데이터를 제외
df = df[['Adj Close']]

# 이동평균선 &amp;rarr; rolling
df['center'] = df['Adj Close'].rolling(20).mean()

# 상단 밴드, 하단 밴드 생성
std_value = 2 * df['Adj Close'].rolling(20).std()
df['ub'] = df['center'] + std_value
df['lb'] = df['center'] - std_value

# index 값을 시계열로 변경
df.index = pd.to_datetime(df.index)

# 투자 시작 시간 설정
start = '2010-01-01'
test_df = df.loc[start: , ]
test_df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;보유 내역 추가&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780214317988&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 구매 상태를 입력할 수 있는 공간 생성

test_df['trade'] = ''
test_df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;상단 밴드보다 수정 주가가 높거나 같은 경우인데 현재 보유 중: 매도&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;상단 밴드보다 수정 주가가 높거나 같은 경우인데 보유 중 아님: 유지&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;하단 밴드보다 수정 주가가 낮거나 같은 경우인데 현재 보유 중: 유지&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;하단 밴드보다 수정 주가가 낮거나 같은 경우인데 보유 중 아님: 매수&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;수정 주가가 밴드 사이에 존재하는 경우인데 현재 보유 중: 유지&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; 수정 주가가 밴드 사이에 존재하는 경우인데 보유 중 아님: 유지&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1780214337229&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;for i in test_df.index:

    # 상단 밴드보다 수정 종가가 높은 경우
    if test_df.loc[i, 'Adj Close'] &amp;gt;= test_df.loc[i, 'ub']:
        # 보유 중이라면 trade = '', 아니라면 trade = ''
        test_df.loc[i, 'trade'] = ''

    # 하단 밴드보다 수정 종가가 낮은 경우
    elif test_df.loc[i, 'Adj Close'] &amp;lt;= test_df.loc[i, 'lb']:
        # 보유 중이라면 trade = 'buy', 아니라면 trade = 'buy'
        test_df.loc[i, 'trade'] = 'buy'
    
    else:
        # 보유 중인 경우 trade = 'buy', 보유 중이 아니면 trade = ''
        # 전날의 trade가 buy인 경우: 보유 중
        if test_df.shift().loc[i, 'trade'] == 'buy':
            test_df.loc[i, 'trade'] = 'buy'
        else:
            test_df.loc[i, 'trade'] = ''&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;수익률 계산&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780214511193&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 수익률 column 생성, 1로 채워준다

test_df['rtn'] = 1.0&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1780214533451&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;for i in test_df.index:
    # 매수 가격 형성
    if (test_df.shift().loc[i, 'trade'] == '') &amp;amp; (test_df.loc[i, 'trade'] == 'buy'):
        buy = test_df.loc[i, 'Adj Close']
        print(f'매수일: {i}, 매수가: {buy}')

    # 매도 가격 형성
    elif (test_df.shift().loc[i, 'trade'] == 'buy') &amp;amp; (test_df.loc[i, 'trade'] == ''):
        sell = test_df.loc[i, 'Adj Close']

        # 수익률 계산
        rtn = sell / buy
        test_df.loc[i, 'rtn'] = rtn
        print(f'매도일: {i}, 매도가: {sell}, 수익률: {rtn}')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;608&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/FXXsl/dJMb99NiGvj/bCQxkG8LecKTiU9QFRkQr1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/FXXsl/dJMb99NiGvj/bCQxkG8LecKTiU9QFRkQr1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/FXXsl/dJMb99NiGvj/bCQxkG8LecKTiU9QFRkQr1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FFXXsl%2FdJMb99NiGvj%2FbCQxkG8LecKTiU9QFRkQr1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;530&quot; height=&quot;608&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;608&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;누적 수익률 계산 &amp;rarr; rtn 누적곱&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780214607611&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;acc_rtn = 1.0

for i in test_df.index:
    rtn = test_df.loc[i, 'rtn']
    acc_rtn *= rtn

test_df['acc_rtn'] = test_df['rtn'].cumprod()
test_df.iloc[-1, -1]
	# np.float64(3.138061358619031)&lt;/code&gt;&lt;/pre&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&amp;nbsp;&lt;/h3&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;번외: Buy and Hold&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;계산이 간단해서 제목을 따로 사용하진 않은 Buy and Hold...&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1780214861597&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 투자 기간 첫날 &amp;rarr; 매수가
# 투자 마지막 날 &amp;rarr; 매도가

bnh_rtn = test_df.iloc[-1, 0] / test_df.iloc[0, 0]
print(bnh_rtn)
	# 14.249095911087196&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&amp;nbsp;  함수화&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;지금까지 만든 것들을 매개변수를 통일하여 하나의 클래스 속 3개의 함수로 연결짓는 과정&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1780214927250&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_band(
        _df,
        _col = 'Adj Close',
        _start = '2010-01-01',
        _end = datetime.now(),
        _cnt = 20
):
    
    df = _df.copy()

    if 'Date' in df.columns:
        df.set_index('Date', inplace=True)
    
    df.index = pd.to_datetime(df.index)

    df.index = df.index.tz_localize(None)

    flag = df.isin( [ np.nan, np.inf, -np.inf ] ).any(axis=1)
    df = df.loc[~flag, ]

    df = df[[_col]]

    df['center'] = df[_col].rolling(_cnt).mean()

    std_value = 2 * df[_col].rolling(_cnt).std()
    df['ub'] = df['center'] + std_value
    df['lb'] = df['center'] - std_value

    df = df.loc[_start:_end, ]

    return df&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780214994075&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2 = pd.read_csv('../csv/aapl.csv')

band_df = create_band2(df2)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;2. 보유 내역 생성 함수&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780215000919&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_trade(_df):
    # 기준 컬럼 이름을 어떻게 알 것인가?: 첫번째 함수의 return으로 나온 df의 첫번째 컬럼만
    col = _df.columns[0]
    df = _df.copy()
    df['trade'] = ''

    for i in df.index:
        if df.loc[i, col] &amp;gt;= df.loc[i, 'ub']:
            # 매도
            df.loc[i, 'trade'] = ''
        elif df.loc[i, col] &amp;lt;= df.loc[i, 'lb']:
            # 매수
            df.loc[i, 'trade'] = 'buy'
        else:
            if df.shift().loc[i, 'trade'] == 'buy':
                df.loc[i, 'trade'] = 'buy'
            else:
                df.loc[i, 'trade'] = ''
    
    return df&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780215007954&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;trade_df = create_trade(band_df)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;3. 수익률 계산 함수&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780215031996&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_rtn(_df):
    col = _df.columns[0]
    df = _df.copy()
    
    df['rtn'] = 1.0

    # 수익률 계산
    for i in df.index:
        # 매수
        if (df.shift().loc[i, 'trade'] == '') &amp;amp; (df.loc[i, 'trade'] == 'buy'):
            buy = df.loc[i, col]
            print(f&quot;매수일: {i}, 매수가: {buy}&quot;)
            print()
        elif (df.shift().loc[i, 'trade'] == 'buy') &amp;amp; (df.loc[i, 'trade'] == ''):
            sell = df.loc[i, col]
            rtn = sell / buy
            df.loc[i, 'rtn'] = rtn
            print(f&quot;매도일: {i}, 매도가: {sell}, 수익률: {rtn}&quot;)
            print()
    # 누적 수익률
    df['acc_rtn'] = df['rtn'].cumprod()
    # 최종 수익률
    acc_rtn = df.iloc[-1, -1]

    return df, acc_rtn&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780215038178&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;rtn_df, acc_rtn = create_rtn(trade_df)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;637&quot; data-origin-height=&quot;608&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/chlukl/dJMcabxBmRP/LFxF9Z3AOdvVxEoKcxRxW1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/chlukl/dJMcabxBmRP/LFxF9Z3AOdvVxEoKcxRxW1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/chlukl/dJMcabxBmRP/LFxF9Z3AOdvVxEoKcxRxW1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fchlukl%2FdJMcabxBmRP%2FLFxF9Z3AOdvVxEoKcxRxW1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;286&quot; height=&quot;273&quot; data-origin-width=&quot;637&quot; data-origin-height=&quot;608&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1780215077793&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;acc_rtn		# np.float64(1.3923287814461949)
rtn_df&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;837&quot; data-origin-height=&quot;395&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/M4bqU/dJMcaarYG8k/XUCZ1e6GD8uTAcS3d9kzy1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/M4bqU/dJMcaarYG8k/XUCZ1e6GD8uTAcS3d9kzy1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/M4bqU/dJMcaarYG8k/XUCZ1e6GD8uTAcS3d9kzy1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FM4bqU%2FdJMcaarYG8k%2FXUCZ1e6GD8uTAcS3d9kzy1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;511&quot; height=&quot;395&quot; data-origin-width=&quot;837&quot; data-origin-height=&quot;395&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;Class로 묶기&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780215125310&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class Investing():
    def __init__(self, _df, _col = 'Adj Close', _start = '2010-01-01', _end = datetime.now()):
        self.df = _df
        self.col = _col
        self.start = _start
        self.end = _end
    
    # 바이앤홀드 함수
    def bnh(self):
        df = self.df.copy()
        if 'Date' in df.columns:
            df.set_index('Date', inplace=True)
        df.index = pd.to_datetime(df.index)
        df = df.loc[self.start:self.end, [self.col]]
        buy = df.iloc[0, 0]
        sell = df.iloc[-1, 0]
        return sell / buy
    
    # 볼린져 밴드 함수
    def boll(self, _cnt = 20):
        band_df = create_band(self.df, self.col, self.start, self.end, _cnt)
        trade_df = create_trade(band_df)
        rtn_df, acc_rtn = create_rtn(trade_df)
        return rtn_df, acc_rtn&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;간이 테스트&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780215182816&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df3 = pd.read_csv('../csv/MSFT.csv')
invest = Investing(df3)
invest.bnh()		# np.float64(5.6387313298309785)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;5월 29일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 그래도 딥러닝을 하다가 pandas 활용하니까 살겠다... &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Momentum&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;(전 월의 수정 주가/전 년도의 수정 주가) - 1로 구매 타이밍을 잡는 투자 전략&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236462102&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
from datetime import datetime

df = pd.read_csv('../csv/AMZN.csv', index_col='Date')
df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;202&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cVp50t/dJMcadB6Y9Z/3WRRcwdQ9un5VrOjWx8AYk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cVp50t/dJMcadB6Y9Z/3WRRcwdQ9un5VrOjWx8AYk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cVp50t/dJMcadB6Y9Z/3WRRcwdQ9un5VrOjWx8AYk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcVp50t%2FdJMcadB6Y9Z%2F3WRRcwdQ9un5VrOjWx8AYk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;845&quot; height=&quot;202&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;202&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236545812&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# index 시계열로 변환
df.index = pd.to_datetime(df.index)

# index 데이터에서 년-월을 추출하여 STD-YM에 대입
df['STD-YM'] = df.index.strftime('%Y-%m')

# 월말만 모아놓은 데이터
month_last_df = df.groupby('STD-YM').tail(1)

# 전월의 수정 주가, 전년도의 수정 주가 컬럼 생성
month_last_df['BF-1M'] = month_last_df.shift(1)['Adj Close'].fillna(0)
month_last_df['BF-12M'] = month_last_df.shift(12)['Adj Close'].fillna(0)

month_last_df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;202&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/clcM9z/dJMcaarYNX0/KaLQw1ZbjEkNvuX3ibzKPK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/clcM9z/dJMcaarYNX0/KaLQw1ZbjEkNvuX3ibzKPK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/clcM9z/dJMcaarYNX0/KaLQw1ZbjEkNvuX3ibzKPK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FclcM9z%2FdJMcaarYNX0%2FKaLQw1ZbjEkNvuX3ibzKPK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;846&quot; height=&quot;202&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;202&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;거래 내역 추가&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236625144&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# month_last = 구매 신호(momentum_index)를 확인하기 위함

for i in month_last_df.index:
    signal = ''

    # 절대 모멘텀의 계산식 &amp;rarr; (전월의 수정 주가 / 전년의 수정 주가) - 1
    momentum_index = (month_last_df.loc[i, 'BF-1M'] / month_last_df.loc[i, 'BF-12M']) - 1

    # 0보다 크고 무한대가 아닌 경우가 구매 신호
    flag = (momentum_index &amp;gt; 0) &amp;amp; (momentum_index != np.inf)
    if flag:
        signal = 'buy'

    print(f'날짜: {i}, momentum_index: {momentum_index}, signal: {signal}')

    df.loc[i:, 'trade'] = signal&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;수익률, 누적 수익률 계산&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236645746&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 수익률, 누적 수익률 계산

df['rtn'] = 1.0

# 수익률 계산
for i in df.index:
    # 매수
    if (df.shift().loc[i, 'trade'] == '') &amp;amp; (df.loc[i, 'trade'] == 'buy'):
        buy = df.loc[i, 'Adj Close']
        print(f&quot;매수일: {i}, 매수가: {buy}&quot;)
    elif (df.shift().loc[i, 'trade'] == 'buy') &amp;amp; (df.loc[i, 'trade'] == ''):
        sell = df.loc[i, 'Adj Close']
        rtn = sell / buy
        df.loc[i, 'rtn'] = rtn
        print(f&quot;매도일: {i}, 매도가: {sell}, 수익률: {rtn}&quot;)
# 누적 수익률
df['acc_rtn'] = df['rtn'].cumprod()
# 최종 수익률
acc_rtn = df.iloc[-1, -1]

acc_rtn&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;411&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/RFdwn/dJMcahq6VLR/DLPzlvuBFVEyKXF23lrRy0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/RFdwn/dJMcahq6VLR/DLPzlvuBFVEyKXF23lrRy0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/RFdwn/dJMcahq6VLR/DLPzlvuBFVEyKXF23lrRy0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FRFdwn%2FdJMcahq6VLR%2FDLPzlvuBFVEyKXF23lrRy0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;847&quot; height=&quot;411&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;411&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;번외: BuyandHold&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1780236731151&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# byandhold 수익률 계산

buy = df['Adj Close'].iloc[0]
sell = df['Adj Close'].iloc[-1]

print(sell/buy)
	# 974.2744757914&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  함수화&lt;/b&gt;&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;1. STD-YM 생성 함수&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236806129&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_ym(_df, _col = 'Adj Close'):
    df = _df.copy()
    # Date가 column에 포함되어 있는가?
    if 'Date' in df.columns:
        df.set_index('Date', inplace = True)
    df.index = pd.to_datetime(df.index)
    df.index = df.index.tz_localize(None)
    flag = df.isin([np.nan, np.inf, -np.inf]).any(axis=1)
    df = df.loc[~flag, [_col]]
    df['STD-YM'] = df.index.strftime('%Y-%m')

    return df&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780236817044&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.read_csv('../csv/AAPL.csv')
ym_df = create_ym(df)
ym_df.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;183&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bxlpYk/dJMcagyUX9y/qtQLpqV0hk9qSMOKpXK7R0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bxlpYk/dJMcagyUX9y/qtQLpqV0hk9qSMOKpXK7R0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bxlpYk/dJMcagyUX9y/qtQLpqV0hk9qSMOKpXK7R0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbxlpYk%2FdJMcagyUX9y%2FqtQLpqV0hk9qSMOKpXK7R0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;846&quot; height=&quot;183&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;183&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;2. 월말 데이터 생성, BF1, BF2 column 생성 함수&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236901885&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_month(
        _df,
        _start = '2010-01-01',
        _end = datetime.now(),
        _momentum = 12,
        _last = 1
):

    if _last == 1:
        df = _df.groupby('STD-YM').tail(1)
    elif _last == 0:
        df = _df.groupby('STD-YM').head(1)
    else:
        return &quot;_last 값은 0 또는 1만 가능합니다.&quot;
    
    col = _df.columns[0]

    df['BF1'] = df.shift(1)[col].fillna(0)
    df['BF2'] = df.shift(_momentum)[col].fillna(0)
    
    df = df.loc[ _start : _end, ]
    return df&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780236913010&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;month_df = create_month(ym_df)
month_df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;841&quot; data-origin-height=&quot;197&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/HZVme/dJMcaf038hq/4nhv2LlNh7fKADHXAKGM50/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/HZVme/dJMcaf038hq/4nhv2LlNh7fKADHXAKGM50/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/HZVme/dJMcaf038hq/4nhv2LlNh7fKADHXAKGM50/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHZVme%2FdJMcaf038hq%2F4nhv2LlNh7fKADHXAKGM50%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;841&quot; height=&quot;197&quot; data-origin-width=&quot;841&quot; data-origin-height=&quot;197&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;3. 거래 내역 추가, 수익률 계산 함수&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1780236952004&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def create_rtn(_df1, _df2, _score = 1):
    df = _df1.copy()

    df['trade'] = ''
    df['rtn'] = 1.0

    col = df.columns[0]

    # _df2를 이용해서 거래 내역을 생성
    for i in _df2.index:
        signal = ''
        
        # momentum 계산
        momentum_index = _df2.loc[i, 'BF1'] / _df2.loc[i, 'BF2'] - _score
        flag = (momentum_index &amp;gt; 0) &amp;amp; (momentum_index != np.inf)
        
        if flag:
            signal = 'buy'
        
        # 거래 내역 생성
        df.loc[i:, 'trade'] = signal
        print(f'날짜: {i}, momentum_index: {momentum_index}, signal: {signal}')
    
    # 수익률 계산
    for i in df.index:
        if (df.shift(1).loc[i, 'trade'] == '') &amp;amp; (df.loc[i, 'trade'] == 'buy'):
            buy = df.loc[i, col]
            print(f'매수일: {i}, 매수가: {buy}')
        elif (df.shift(1).loc[i, 'trade'] == 'buy') &amp;amp; (df.loc[i, 'trade'] == ''):
            sell = df.loc[i, col]
            rtn = sell / buy
            df.loc[i, 'rtn'] = rtn
            print(f'매도일: {i}, 매도가: {sell}, 수익률: {rtn}')
    
    # 누적 수익률 계산
    df['acc_rtn'] = df['rtn'].cumprod()
    acc_rtn = df.iloc[-1, -1]

    return df, acc_rtn&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1780236973292&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df_final, acc_rtn = create_rtn(ym_df, month_df)

print(acc_rtn)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;608&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ckqQRz/dJMcaaZMnrL/kg0yacSCB5gl91Nc5HIKd1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ckqQRz/dJMcaaZMnrL/kg0yacSCB5gl91Nc5HIKd1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ckqQRz/dJMcaaZMnrL/kg0yacSCB5gl91Nc5HIKd1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FckqQRz%2FdJMcaaZMnrL%2Fkg0yacSCB5gl91Nc5HIKd1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;403&quot; height=&quot;289&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;608&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;8주차 소감&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;다음 주 자연어 처리라서 쉬고 가는 타이밍을 만들어주셨는데 벌써부터 걱정된다... &lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;한글이 자연어 처리 중 가장 어렵다고 하는데... 물론 수업은 영어로 할 거지만 괜히 한글 자연어 처리에 대한 욕심이 생겼다고 할까?&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;원래 아무것도 모를 때 용감하다고 하지... 정신 바짝 차리자 &lt;/p&gt;</description>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/8</guid>
      <comments>https://bbgw-oshoulder.tistory.com/8#entry8comment</comments>
      <pubDate>Sun, 31 May 2026 23:30:11 +0900</pubDate>
    </item>
    <item>
      <title>7주차 Note: 머신러닝/딥러닝</title>
      <link>https://bbgw-oshoulder.tistory.com/7</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;KakaoTalk_20260524_173242269.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmzN1O/dJMcaaSXBy2/nDsO66pZBlMnkLYBmrJeq0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmzN1O/dJMcaaSXBy2/nDsO66pZBlMnkLYBmrJeq0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmzN1O/dJMcaaSXBy2/nDsO66pZBlMnkLYBmrJeq0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmzN1O%2FdJMcaaSXBy2%2FnDsO66pZBlMnkLYBmrJeq0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;495&quot; height=&quot;264&quot; data-filename=&quot;KakaoTalk_20260524_173242269.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt; ️ 7주차: 5월 18일 ~ 5월 22일&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt;&lt;span&gt;&amp;nbsp;&lt;/span&gt;부트캠프 7주차 요약&lt;/b&gt;✍️&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/18 ]&lt;span&gt;&lt;b&gt; 머신러닝&lt;/b&gt;: XGBoost, PCA, K-Means, t-SNE&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/19 ]&lt;span&gt;&lt;b&gt; 머신러닝&lt;/b&gt;: DBSCAN&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/20 ]&lt;span&gt;&lt;b&gt; 딥러닝&lt;/b&gt;: 이론 학습. 본문에서는 생략&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/21 ]&lt;span&gt;&lt;b&gt; 딥러닝&lt;/b&gt;: torch&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/22 ]&lt;span&gt;&lt;b&gt; 딥러닝&lt;/b&gt;: RNN&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 18일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; 월요일 힘들지만... 힘들다 응.. 힘들어. ㅋㅋㅋㅋㅋ정신줄 붙잡자 &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  XGBoost&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;부스팅의 대표적인 모델로, 빠른 속도, 높은 성능, 과적합 제어 기능을 제고아는 트리 기반 앙상블 모델&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;body dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779092445370&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, mean_squared_error, r2_score
import pandas as pd
import numpy as np

body = pd.read_csv('../data/bodyPerformance.csv')
body.head(3)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;157&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b6tU5X/dJMcador1Gb/nil9WKTAK5UrAq2g1p6Kg1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b6tU5X/dJMcador1Gb/nil9WKTAK5UrAq2g1p6Kg1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b6tU5X/dJMcador1Gb/nil9WKTAK5UrAq2g1p6Kg1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb6tU5X%2FdJMcador1Gb%2Fnil9WKTAK5UrAq2g1p6Kg1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;843&quot; height=&quot;157&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;157&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779092699001&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 성별/등급을 수치형 데이터로 변환

body['gender'] = np.where(
    body['gender'] == 'M', 0, 1
)

body['class'] = body['class'].map({'A': 0, 'B': 1, 'C': 2, 'D': 3})

body.head(3)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;161&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cAneH2/dJMcai4nss7/dFK2KgefsTnsKFKiClmfVk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cAneH2/dJMcai4nss7/dFK2KgefsTnsKFKiClmfVk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cAneH2/dJMcai4nss7/dFK2KgefsTnsKFKiClmfVk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcAneH2%2FdJMcai4nss7%2FdFK2KgefsTnsKFKiClmfVk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;845&quot; height=&quot;161&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;161&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779092935118&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = body.drop('class', axis = 1)
y = body['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, \
random_state = 42, stratify = y)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779092967256&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# XGBoost 분류 모델을 생성 (다중 분류 모델 &amp;rarr; softmax 기반)

clf = XGBClassifier(
    n_estimators = 1000,
    learning_rate = 0.05,
    objective = 'multi:softprob',
    eval_metric = 'mlogloss',
    max_depth = 5,
    min_child_weight = 2,
    subsample = 0.8,
    colsample_bytree = 0.8,
    early_stopping_rounds = 50,
    random_state = 42,
    tree_method = 'hist'
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779093060975&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf.fit(X_train, y_train, eval_set = [(X_test, y_test)], verbose = 50)
print(&quot;Best Iteration: &quot;, clf.best_iteration)
print(&quot;Best Score: &quot;, clf.best_score)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;365&quot; data-origin-height=&quot;351&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bk4cE8/dJMcaf0U0CC/joEMTEOHDA0lwIwmJaZS7K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bk4cE8/dJMcaf0U0CC/joEMTEOHDA0lwIwmJaZS7K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bk4cE8/dJMcaf0U0CC/joEMTEOHDA0lwIwmJaZS7K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbk4cE8%2FdJMcaf0U0CC%2FjoEMTEOHDA0lwIwmJaZS7K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;365&quot; height=&quot;351&quot; data-origin-width=&quot;365&quot; data-origin-height=&quot;351&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;296&quot; data-origin-height=&quot;42&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nsys4/dJMcagMmhgW/KlKeYaX1zfQlL0X3hgTUNk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nsys4/dJMcagMmhgW/KlKeYaX1zfQlL0X3hgTUNk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nsys4/dJMcagMmhgW/KlKeYaX1zfQlL0X3hgTUNk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fnsys4%2FdJMcagMmhgW%2FKlKeYaX1zfQlL0X3hgTUNk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;296&quot; height=&quot;42&quot; data-origin-width=&quot;296&quot; data-origin-height=&quot;42&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779093242685&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf_pred = clf.predict(X_test)
print(confusion_matrix(y_test, clf_pred))
print()
print(classification_report(y_test, clf_pred))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;446&quot; data-origin-height=&quot;307&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/463Sv/dJMcacDbCMt/vmt0fesIisUGdOAzwRc6L1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/463Sv/dJMcacDbCMt/vmt0fesIisUGdOAzwRc6L1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/463Sv/dJMcacDbCMt/vmt0fesIisUGdOAzwRc6L1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F463Sv%2FdJMcacDbCMt%2Fvmt0fesIisUGdOAzwRc6L1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;352&quot; height=&quot;242&quot; data-origin-width=&quot;446&quot; data-origin-height=&quot;307&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;hyper parameter들을 조정해주며 성능을 더욱 향상시킬 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  PCA&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;차원 축소 기법으로, 고차원 데이터를 상관관계가 없는 새로운 축으로 변환, 이때 데이터의 분산을 최대한 보존하면서 축소한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779093729303&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, SVR
from sklearn.metrics import classification_report, r2_score

iris = pd.read_csv('../csv/iris.csv')
iris.head()

X = iris.drop('species', axis = 1)
y = iris['species']&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;492&quot; data-origin-height=&quot;176&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bpmM17/dJMcah5yZVP/TfIGx8wdLIhWjlzkJ94Xk1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bpmM17/dJMcah5yZVP/TfIGx8wdLIhWjlzkJ94Xk1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bpmM17/dJMcah5yZVP/TfIGx8wdLIhWjlzkJ94Xk1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbpmM17%2FdJMcah5yZVP%2FTfIGx8wdLIhWjlzkJ94Xk1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;327&quot; height=&quot;117&quot; data-origin-width=&quot;492&quot; data-origin-height=&quot;176&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style7&quot; /&gt;
&lt;pre id=&quot;code_1779094059066&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pca = PCA()
X_pca = pca.fit_transform(X, y)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779094110135&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42, stratify = y
)

# KFold 생성
cv = StratifiedKFold(
    n_splits = 5, shuffle = True, random_state = 42
)

# Pipeline 생성: Scaler &amp;rarr; PCA &amp;rarr; SVC
pipe = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('pca', PCA(random_state = 42)),
        ('svc', SVC(random_state = 42, probability = True))
    ]
)

# 최적의 parameter를 찾기 위한 parameter의 조합
params = {
    'pca__n_components' : [None, 2, 3],
    'svc__C' : [0.1, 1, 10],
    'svc__gamma': ['scale', 'auto'],
    'svc__kernel' : ['linear', 'rbf']
}

# GridSearchCV 객체를 생성
grid = GridSearchCV(
    estimator = pipe,
    param_grid = params,
    scoring = 'accuracy',
    cv = cv,
    n_jobs = -1,
    verbose = 1,
    refit = True
)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1779094156548&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;grid.fit(X_train, y_train)

print('Best estimator: ', grid.best_estimator_)
print('Best Parameter: ', grid.best_params_)
print(classification_report(
    y_test, grid.predict(X_test)
))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;841&quot; data-origin-height=&quot;282&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/drFQRg/dJMcaiXCwvy/llxOe6aAmQ7nbsXsYBDqS0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/drFQRg/dJMcaiXCwvy/llxOe6aAmQ7nbsXsYBDqS0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/drFQRg/dJMcaiXCwvy/llxOe6aAmQ7nbsXsYBDqS0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdrFQRg%2FdJMcaiXCwvy%2FllxOe6aAmQ7nbsXsYBDqS0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;561&quot; height=&quot;282&quot; data-origin-width=&quot;841&quot; data-origin-height=&quot;282&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  K-means&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;k개의 그룹으로 자동 분류하는 비지도 학습 알고리즘으로,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그룹의 중심점을 반복적으로 계산하여 서로 가까운 데이터끼리 묶어 군집을 형성한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1779094342956&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score, adjusted_rand_score

iris = pd.read_csv('../csv/iris.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779094524804&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = iris.drop('species', axis = 1)
y = iris['species']

X_std = StandardScaler().fit_transform(X)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779094552353&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# Kmeans 객체 생성
model = KMeans(
    n_clusters = 3,
    random_state = 42,
    n_init = 10
)

model.fit(X_std)
labels = model.predict(X_std)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779094585037&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 검증 지표 (군집이 얼마나 잘 되었는가?)
inertia = model.inertia_
silhouette = silhouette_score(X_std, labels)
chs = calinski_harabasz_score(X_std, labels)
dbs = davies_bouldin_score(X_std, labels)

# 실제 라벨과의 계산
ari = adjusted_rand_score(y, labels)

print('inertia: ', round(inertia, 3))
print('silhouette: ', round(silhouette, 3))
print('calinski: ', round(chs, 3))
print('davies: ', round(dbs, 3))
print('ARI: ', round(ari, 3))&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;inertia:&amp;nbsp;&amp;nbsp;139.82 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;silhouette:&amp;nbsp;&amp;nbsp;0.46 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;calinski:&amp;nbsp;&amp;nbsp;241.904 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;davies:&amp;nbsp;&amp;nbsp;0.834 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;ARI:&amp;nbsp;&amp;nbsp;0.62&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이때 &lt;b&gt;inertia_&lt;/b&gt;, &lt;b&gt;davies_Bouldin_score&lt;/b&gt;은 &lt;b&gt;작을수록&lt;/b&gt; 좋고, &lt;b&gt;calinski_Harabasz_score&lt;/b&gt;은 &lt;b&gt;높을수록&lt;/b&gt; 좋으며,&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;silhouette_score&lt;/b&gt;, &lt;b&gt;adjusted_rand_score&lt;/b&gt;은 &lt;b&gt;1에 가까울수록&lt;/b&gt; 좋다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779094859532&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 군집의 상황을 2차원 그래프로 시각화 &amp;rarr; 차원 축소
pca = PCA(n_components=2)

X_pca = pca.fit_transform(X_std)

# pca를 이용해서, 중심점을 2차원으로 축소
center_pca = pca.transform(model.cluster_centers_)

plt.figure(figsize = (12, 8))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c = labels, cmap = 'viridis', s = 40, alpha = 0.7)
plt.scatter(center_pca[:, 0], center_pca[:, 1], c = 'black', s = 100, marker = 'X', label='center')
plt.legend()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리55.png&quot; data-origin-width=&quot;980&quot; data-origin-height=&quot;659&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/5Om6k/dJMcad24YJ8/fdS4EQSsgTyYM7sbf18wQk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/5Om6k/dJMcad24YJ8/fdS4EQSsgTyYM7sbf18wQk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/5Om6k/dJMcad24YJ8/fdS4EQSsgTyYM7sbf18wQk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F5Om6k%2FdJMcad24YJ8%2FfdS4EQSsgTyYM7sbf18wQk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;430&quot; height=&quot;659&quot; data-filename=&quot;티스토리55.png&quot; data-origin-width=&quot;980&quot; data-origin-height=&quot;659&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  t-SNE&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;비선형 차원 축소 기법으로, 고차원 데이터의 국소 구조를 보존하면서 2차원 또는 3차원 시각화를 하기 위해 사용한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;고차원 데이터의 경우에는 거리 기반 유사도를 사용하며, 저차원의 경우에는 t-분포를 이용해 확률적으로 비슷하게 재현한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;digits dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;digits dataset은 숫자를 나타내는데 필요한 데이터를 담은 dataset이다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 dataset에 포함된 image column을 활용하여 시각화를 하면 다음과 같은 이미지가 도출된다.&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;각 데이터가 0 ~ 9 중 어디에 속하는지 시각화를 수행하는 작업을 수행할 것이다.&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리57.png&quot; data-origin-width=&quot;407&quot; data-origin-height=&quot;212&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Qrd4G/dJMcacpDFoO/NxkrZoWa0CgP7QeF48Jnr1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Qrd4G/dJMcacpDFoO/NxkrZoWa0CgP7QeF48Jnr1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Qrd4G/dJMcacpDFoO/NxkrZoWa0CgP7QeF48Jnr1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FQrd4G%2FdJMcacpDFoO%2FNxkrZoWa0CgP7QeF48Jnr1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;227&quot; height=&quot;118&quot; data-filename=&quot;티스토리57.png&quot; data-origin-width=&quot;407&quot; data-origin-height=&quot;212&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1779095284363&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

digits = load_digits()
pd.DataFrame(digits.data)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;823&quot; data-origin-height=&quot;372&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bs6brv/dJMcagZO1cV/jx0BgcJtAyafS1UTrW1VF0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bs6brv/dJMcagZO1cV/jx0BgcJtAyafS1UTrW1VF0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bs6brv/dJMcagZO1cV/jx0BgcJtAyafS1UTrW1VF0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbs6brv%2FdJMcagZO1cV%2Fjx0BgcJtAyafS1UTrW1VF0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;593&quot; height=&quot;268&quot; data-origin-width=&quot;823&quot; data-origin-height=&quot;372&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;column이 64개다 = 64차원이다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779095375635&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;tsne = TSNE(
    n_components=2,
    perplexity=40,
    random_state=42,
    n_jobs=-1
)

X = StandardScaler().fit_transform(digits['data'])
y = digits['target']

X_tsne = tsne.fit_transform(X)

print(X.shape)			# (1797, 64)
print(X_tsne.shape)		# (1797, 2)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;기존 64차원에서 2차원으로 줄어든 모습을 볼 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779095419811&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(16, 10))

sc = plt.scatter(
    X_tsne[:, 0], X_tsne[:, 1], c = y, cmap = 'cool', s = 40
)

plt.legend(*sc.legend_elements(), title='Digits')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리56.png&quot; data-origin-width=&quot;1298&quot; data-origin-height=&quot;813&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/HeQWy/dJMcaaeh33Z/Y2dUjfKSsXjpwuAc7eBRsK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/HeQWy/dJMcaaeh33Z/Y2dUjfKSsXjpwuAc7eBRsK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/HeQWy/dJMcaaeh33Z/Y2dUjfKSsXjpwuAc7eBRsK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHeQWy%2FdJMcaaeh33Z%2FY2dUjfKSsXjpwuAc7eBRsK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;546&quot; height=&quot;813&quot; data-filename=&quot;티스토리56.png&quot; data-origin-width=&quot;1298&quot; data-origin-height=&quot;813&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;각 target 별로 데이터가 뭉쳐져 있는 모습을 확인할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 19일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&amp;nbsp; &lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;대학교에서 진행했던 팀플에서&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; DBSCAN을 사용할 뻔한 적이 있었는데,&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그때만 해도 관련 과정을 많이 듣지 않았던 때라 뭔지 몰라서 헤매었던 기억이 있다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;(배운 것 이외의 방법이 나오면 많이 쫄았었지...)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;많은 방법을 접하고 있는 지금을 새로이 도전하는 것을 두려워하지 않는 계기로도 발전시켜나갔으면 좋겠다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  DBSCAN&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;밀도 기반의 클러스터링 알고리즘으로, 데이터의 밀도를 이용하여 군집을 생성한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;body dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779167631702&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_score, adjusted_rand_score
from sklearn.decomposition import PCA

body = pd.read_csv('../data/bodyPerformance.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;늘 그랬듯 범주형 데이터를 수치로 변환하고, 독립/종속으로 분할&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1779178331117&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;obj_cols = body.select_dtypes('object').columns
le = LabelEncoder()

for col in obj_cols:
    body[col] = le.fit_transform(body[col])
    
X = body.drop(['class'], axis = 1)
y1 = body['class']
y2 = body['gender']

X_std = StandardScaler().fit_transform(X)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;X 데이터에서 k번째 최근접 이웃의 거리를 계산&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;각각의 거리 데이터에서 10번째 이웃의 거리만 추출하여 kth_list에 저장&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1779178499193&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;nbrs = NearestNeighbors(n_neighbors=10).fit(X_std)
distances, idxs = nbrs.kneighbors(X_std)
kth_list = np.sort(distances[:, -1])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779178575529&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(5, 5))
plt.plot(kth_list)
plt.grid(True)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리58.png&quot; data-origin-width=&quot;455&quot; data-origin-height=&quot;428&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ccZ4RM/dJMcaak14Yy/Md52kk2EzVs128WvKYu1eK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ccZ4RM/dJMcaak14Yy/Md52kk2EzVs128WvKYu1eK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ccZ4RM/dJMcaak14Yy/Md52kk2EzVs128WvKYu1eK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FccZ4RM%2FdJMcaak14Yy%2FMd52kk2EzVs128WvKYu1eK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;259&quot; height=&quot;244&quot; data-filename=&quot;티스토리58.png&quot; data-origin-width=&quot;455&quot; data-origin-height=&quot;428&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;값이 13,000 근처에서 확 꺾이는 것을 볼 수 있는데, 이 꺾인 뒤의 값들은 노이즈라고 생각할 수 있으며,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 부분의 y값을 eps로 잡아주면 된다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1779178899078&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;diffs = np.diff(kth_list)
eps_ = np.where(diffs&amp;gt;0.05)[0]
print('급변하는 인덱스의 값: ', eps_)
print('대략적인 eps의 최적값: ', kth_list[eps_[0]])

# 급변하는 인덱스의 값:  [13357 13370 13371 13374 13376 13378 13380 13382 13383 13384 13385 13386 13387 13388 13389 13390 13391]
# 대략적인 eps의 최적값:  3.051458810772378&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;DBSCAN&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779179598629&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;db = DBSCAN(
    eps = 1.7,
    min_samples = 10,
    n_jobs = -1
)

labels = db.fit_predict(X_std)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1779181301274&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;set(labels)

db = DBSCAN(
    eps = 1.7,
    min_samples = 10,
    n_jobs = -1
)

labels = db.fit_predict(X_std)

# 검증 지표
# 노이즈 제외하고 검증
flag = labels != -1
sil = silhouette_score(X_std[flag], labels[flag])
ari = adjusted_rand_score(y1, labels)
print(sil)
print(ari)

# 0.3466129480317822
# 0.0049927528548928055&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;막상 eps를 3으로 지정하니까 군집이 하나만 생성되었기에, eps값을 1.7로 낮추어 분석을 진행하였다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779181373683&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# PCA를 이용하여 2차원으로 차원 축소하고 그래프 시각화
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_std)

# 노이즈 / 비노이즈 부분을 다르게 표시
non_noise = labels != -1

# 비노이즈 산점도 그래프 표시
plt.scatter(X_pca[non_noise, 0], X_pca[non_noise, 1], \
            c = labels[non_noise], cmap = 'cool', label = 'Cluster', alpha = 0.5)

# 노이즈 산점도 그래프 표시
plt.scatter(X_pca[~non_noise, 0], X_pca[~non_noise, 1], c = 'black', label = 'Noise', alpha = 0.3)

plt.legend()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리59.png&quot; data-origin-width=&quot;546&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/yHJ0Q/dJMcaiwBxwC/ETbNYSukVTvqGcMGRqZ2W0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/yHJ0Q/dJMcaiwBxwC/ETbNYSukVTvqGcMGRqZ2W0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/yHJ0Q/dJMcaiwBxwC/ETbNYSukVTvqGcMGRqZ2W0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FyHJ0Q%2FdJMcaiwBxwC%2FETbNYSukVTvqGcMGRqZ2W0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;325&quot; height=&quot;246&quot; data-filename=&quot;티스토리59.png&quot; data-origin-width=&quot;546&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 21일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감: &lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;학교에서는 Tensorflow로만 하다가 Torch를 하니까 좀 어색한 느낌이 있다... &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;코드를 다시 살펴보며 꼼꼼히 복습해야겠다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Torch - 선형 모델&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779460946457&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import torch
import torch.nn as nn
import torch.optim as optim&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;torch: tensor type으로 parsing하기 위함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;nn: 기본 뼈대&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;optim: 기울기 변화를 주는 기능&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;독립, 종속변수 tensor로 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461015538&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[3.0], [5.0], [7.0], [9.0]])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779461109626&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 순전파 (모델 학습 &amp;rarr; 예측)

class LinearReg(nn.Module):
    def __init__(self):
        super(LinearReg, self).__init__()
        self.linear = nn.Linear(1, 1)
    
    def forward(self, x):
        return self.linear(x)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;클래스 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461151733&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model = LinearReg()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;손실함수, 옵티마이저 설정&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461193373&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델 학습 (1번)&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461374952&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 순전파 (생성된 모델을 호출하면 forward() 함수를 호출하도록 nn.Module에서 설정되어있음)
pred = model(X)
# LinearReg 클래스 안의 forward 함수를 호출하여 독립변수(x)를 인자값으로 사용한다.

# 손실 함수
loss = criterion(pred, y)

# 기울기 초기화
optimizer.zero_grad()

# 역전파 (자동 미분) &amp;rarr; 데이터가 있는 쪽으로 방향을 제시한다
loss.backward()

# 가중치를 업데이트 (파라미터(모델) 수정)
optimizer.step()

# loss 값을 확인
print(loss)
# tensor(49.5290, grad_fn=&amp;lt;MseLossBackward0&amp;gt;)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461653853&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# DL 모델은 반복 학습이 기본 설정 &amp;rarr; 학습 모드를 평가 모드로 전환
# eval(): 모델을 평가모드로 전환
# train(): 모델을 학습 모드로 전환
model.eval()

# 예측, 평가 (메모리의 사용량을 줄이기 위해서 가중치의 계산을 잠시 비활성화)
with torch.no_grad():
    y_pred = model(X)
    loss = criterion(y_pred, y)
    print(y_pred)
    print(loss)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;848&quot; data-origin-height=&quot;112&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KNZ9C/dJMcaf0ZdLp/nvhdo5rMvJP4l4cGxP5ku0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KNZ9C/dJMcaf0ZdLp/nvhdo5rMvJP4l4cGxP5ku0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KNZ9C/dJMcaf0ZdLp/nvhdo5rMvJP4l4cGxP5ku0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKNZ9C%2FdJMcaf0ZdLp%2Fnvhdo5rMvJP4l4cGxP5ku0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;848&quot; height=&quot;112&quot; data-origin-width=&quot;848&quot; data-origin-height=&quot;112&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779461681294&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 반복 학습을 통해서 가중치와 편향을 변화시킨다.
epochs = 200
model.train()

for epoch in range(epochs):
    # 순전파
    pred = model(X)
    # 손실 함수
    loss = criterion(pred, y)
    # 기울기 초기화
    optimizer.zero_grad()
    # 자동 미분(역전파) &amp;rarr; 가중치의 방향을 제시
    loss.backward()
    # 가중치를 업데이트
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f&quot;Epoch: [{epoch+1}, 200], Loss: {round(loss.item(), 6)}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;844&quot; data-origin-height=&quot;206&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eIRxLk/dJMcahdsVQ8/ddgfXMB14k7ZYGlXQVku90/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eIRxLk/dJMcahdsVQ8/ddgfXMB14k7ZYGlXQVku90/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eIRxLk/dJMcahdsVQ8/ddgfXMB14k7ZYGlXQVku90/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeIRxLk%2FdJMcahdsVQ8%2FddgfXMB14k7ZYGlXQVku90%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;844&quot; height=&quot;206&quot; data-origin-width=&quot;844&quot; data-origin-height=&quot;206&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;반복학습 후 모델 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779461738449&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.eval()

with torch.no_grad():
    y_pred = model(X)
    loss = criterion(y_pred, y)
    print(y_pred)
    print(loss)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;112&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/52L9u/dJMcahkgLNQ/4j2KMLDaVqSAk6LQkIXtN1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/52L9u/dJMcahkgLNQ/4j2KMLDaVqSAk6LQkIXtN1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/52L9u/dJMcahkgLNQ/4j2KMLDaVqSAk6LQkIXtN1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F52L9u%2FdJMcahkgLNQ%2F4j2KMLDaVqSAk6LQkIXtN1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;846&quot; height=&quot;112&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;112&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Torch - 비선형 모델: 회귀&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;california dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779591497888&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

data = fetch_california_housing()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779591527474&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = data['data']
y = data['target']

print(X.shape, y.shape)
# (20640, 8) (20640,)

# 1차원 데이터를 2차원으로 변경
y = y.reshape(-1, 1)

# 학습, 평가 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)

# Tensor 형태로 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779591629234&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 비선형 모델 생성 (선형 모델 &amp;rarr; 활성화 함수 &amp;rarr; 선형 모델)

class Reg2(nn.Module):
    def __init__ (self, _dim):
        super(Reg2, self).__init__()
        # 다중 퍼셉트론 안에 선형 모델 &amp;rarr; 활성화 함수 &amp;rarr; 선형 모델
        self.model = nn.Sequential(
            # 첫번째 레이어
            nn.Linear(_dim, _dim),
            # 비선형 구조 파악용 활성화 함수
            nn.ReLU(),
            nn.Linear(_dim, 1)
        )
    
    def forward(self, x):
        return self.model(x)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1779591673584&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model3 = Reg2(n_feature)
criterion3 = nn.MSELoss()
optimizer3 = optim.SGD(model3.parameters(), lr = 0.01)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779591697224&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 반복 학습
model3.train()

for epoch in range(300):
    n = epoch + 1
    pred3 = model3(X_train_sc)
    loss3 = criterion3(pred3, y_train_tensor)
    optimizer3.zero_grad()
    loss3.backward()
    optimizer3.step()

    if n % 30 == 0:
        print(f&quot;Epoch [{n} / 300], Loss: {round(loss3.item(), 6)}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;853&quot; data-origin-height=&quot;204&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ladym/dJMcagFFpYu/XdhvK32KkSlmYqHa6ynHsK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ladym/dJMcagFFpYu/XdhvK32KkSlmYqHa6ynHsK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ladym/dJMcagFFpYu/XdhvK32KkSlmYqHa6ynHsK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fladym%2FdJMcagFFpYu%2FXdhvK32KkSlmYqHa6ynHsK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;853&quot; height=&quot;204&quot; data-origin-width=&quot;853&quot; data-origin-height=&quot;204&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779592658317&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model3.eval()
with torch.no_grad():
    pred3 = model3(X_test_sc)
    loss3 = criterion3(pred3, y_test_tensor)

for i in range(10):
    print(f&quot;실제 데이터: {y_test[i]}, 예측 데이터: {pred3[i].item()}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;203&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/zNJnJ/dJMcabK8RIj/H6KK57jHCcVoFJbEJ6G4Kk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/zNJnJ/dJMcabK8RIj/H6KK57jHCcVoFJbEJ6G4Kk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/zNJnJ/dJMcabK8RIj/H6KK57jHCcVoFJbEJ6G4Kk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzNJnJ%2FdJMcabK8RIj%2FH6KK57jHCcVoFJbEJ6G4Kk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;845&quot; height=&quot;203&quot; data-origin-width=&quot;845&quot; data-origin-height=&quot;203&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span&gt;  Torch - 비선형 모델: 분류&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1779594175323&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('../csv/iris.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779594264469&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = df.drop('species', axis = 1)
y = df['species']

le = LabelEncoder()
y = le.fit_transform(y)
y

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42, stratify = y
)

# Scaling
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_test_sc = scaler.transform(X_test)

# Tensor 형태로 변환
X_train_tensor = torch.tensor(X_train_sc, dtype = torch.float32)
X_test_tensor = torch.tensor(X_test_sc, dtype = torch.float32)
y_train_tensor = torch.tensor(y_train, dtype = torch.long)
y_test_tensor = torch.tensor(y_test, dtype = torch.long)
	# 분류의 경우에는 종속변수를 long 타입으로 잡는다.&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779595401727&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class clf(nn.Module):
    def __init__(self, _dim):
        super(clf, self).__init__()
        self.model = nn.Linear(_dim, 3)
    
    def forward(self, x):
        return self.model(x)

clf_model = clf(X.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(clf_model.parameters(), lr = 0.01)
pred = clf_model(X_train_tensor)
pred&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779595429116&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf_model.train()

for epoch in range(300):
    pred = clf_model(X_train_tensor)
    loss = criterion(pred, y_train_tensor)
    optimizer.zero_grad()       # 기울기 초기화
    loss.backward()             # 자동 미분
    optimizer.step()            # 기울기 업데이트
    n = epoch + 1
    
    if n % 30 == 0:
        print(f'Epoch: [{n} / 300], Loss: {round(loss.item(), 6)}')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;844&quot; data-origin-height=&quot;200&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/czJ5Qi/dJMcahdtxHg/EtxKHgV490jm5x3qAupKs0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/czJ5Qi/dJMcahdtxHg/EtxKHgV490jm5x3qAupKs0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/czJ5Qi/dJMcahdtxHg/EtxKHgV490jm5x3qAupKs0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FczJ5Qi%2FdJMcahdtxHg%2FEtxKHgV490jm5x3qAupKs0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;844&quot; height=&quot;200&quot; data-origin-width=&quot;844&quot; data-origin-height=&quot;200&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1779595474696&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf_model.eval()

with torch.no_grad():
    pred = clf_model(X_test_tensor)
    _, pred_idx = torch.max(pred, 1)

acc = accuracy_score(y_test, pred_idx)
print('정확도: ', round(acc, 4))
print(classification_report(y_test, pred_idx))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;848&quot; data-origin-height=&quot;210&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/brhGAS/dJMcagFFqsu/ZE0P4kiCyadStE1ZMTBKv0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/brhGAS/dJMcagFFqsu/ZE0P4kiCyadStE1ZMTBKv0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/brhGAS/dJMcagFFqsu/ZE0P4kiCyadStE1ZMTBKv0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbrhGAS%2FdJMcagFFqsu%2FZE0P4kiCyadStE1ZMTBKv0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;848&quot; height=&quot;210&quot; data-origin-width=&quot;848&quot; data-origin-height=&quot;210&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 22일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;처음 보는 라이브러리는 역시 어려워 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  RNN&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;insurance dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779599783658&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 생성, 전처리&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779606876912&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;np.random.seed(42)
torch.manual_seed(42)

# 노이즈를 포함한 연속성을 가진 sin 곡선 생성
x = torch.arange(3000).float()
y = torch.sin(2 * math.pi * 0.02 * x) + 0.05 * torch.randn(3000)

# train test split
train_size = int(0.8 * len(x))
print(train_size)		# 2400

X_train = y[:train_size]
X_test = y[train_size:]

scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train.reshape(-1, 1))
X_test_sc = scaler.transform(X_test.reshape(-1, 1))
	# reshape를 통해 2차원 array의 형태로 변환

X_train_tensor = torch.tensor(X_train_sc, dtype = torch.float32)
X_test_tensor = torch.tensor(X_test_sc, dtype=torch.float32)

X_train_tensor.shape		# torch.Size([2400, 1])


# Tensor Data를 배치가 존재하는 3차원 데이터로 변환
X_train_tensor = X_train_tensor.unsqueeze(0)
X_test_tensor = X_test_tensor.unsqueeze(0)
X_train_tensor.shape		# torch.Size([1, 2400, 1])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;DataLoader&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Pytorch에서 Dataset들을 batch 단위로 꺼내주는 반복자&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;반복 학습 루트에 맞는 형태의 데이터셋을 공급해주는 역할&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1779608651909&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# Custom Dataset 클래스 정의

class WindowDataset(Dataset):
    def __init__(self, _data, _window):
        # _data: 시계열 데이터
        # _window : 구간 설정
        self.data = _data
        self.window = _window
        # 유효 샘플의 개수 계산
        # 입력 데이터: 전체 길이 - window 크기
        self.n = len(self.data) - self.window
    # __len__ 메서드: 데이터셋의 크기를 반환
    # __getitem__ 메서드: 인덱스에 해당하는 샘플을 반환
    # 특수 메서드: DataLoader가 데이터셋에서 데이터를 불러올 때 자동으로 호출
    def __len__(self):
        return self.n

    def __getitem__(self, idx):
        # DataLoader 실제로 입력된 데이터들을 슬라이스해서 가지고 가는 함수
        x = self.data[ idx : idx + self.window ]
        y = self.data[ idx + self.window ]
        return x, y&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;DataLoader 활용 데이터 분할&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779610336636&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;window = 10
train_ds = WindowDataset(X_train_tensor.squeeze(0), window)
test_ds = WindowDataset(X_test_tensor.squeeze(0), window)

batch_size = 10
x_list = []
y_list = []

for i in range(len(train_ds)):
    if i == 10:
        break
    x, y = train_ds[i]
    x_list.append(x)
    y_list.append(y)

train_dl = DataLoader(
    train_ds,
    batch_size = 64,
    drop_last=True,
    shuffle=True
)

test_dl = DataLoader(
    test_ds,
    batch_size=64,
    drop_last=True,
    shuffle=False
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;반복 학습&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779610550319&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;train_loss_list = []
test_loss_list = []

for epoch in range(20):
    model.train()

    running = 0.0
    n_seen = 0

    for x, y in train_dl:
        x = x.float()
        y = y.float()

        # RNN 모델에 입력(x)을 넣어서 예측값(yhat)을 얻는다.
        yhat = model(x)

        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()

        # 미분값 폭주 방지
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

        running += loss.item() * x.size(0)
        n_seen += x.size(0)
    
    train_loss = running / max(1, n_seen)
    train_loss_list.append(train_loss)
    print(f'Epoch {epoch+1}, Train Loss: {round(train_loss, 8)}')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;852&quot; data-origin-height=&quot;393&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bBFFqs/dJMb990QdaQ/RljnpCZeYHcrgzpSj4GXI1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bBFFqs/dJMb990QdaQ/RljnpCZeYHcrgzpSj4GXI1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bBFFqs/dJMb990QdaQ/RljnpCZeYHcrgzpSj4GXI1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbBFFqs%2FdJMb990QdaQ%2FRljnpCZeYHcrgzpSj4GXI1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;533&quot; height=&quot;246&quot; data-origin-width=&quot;852&quot; data-origin-height=&quot;393&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779610632825&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.eval()
preds = []
trues = []

with torch.no_grad():
    for x, y in test_dl:
        x = x.float()
        y = y.float()
        pred = model(x)
        preds.append(pred)
        trues.append(y)
        
preds = torch.cat(preds, 0).squeeze(-1).numpy()
trues = torch.cat(trues, 0).squeeze(-1).numpy()&lt;/code&gt;&lt;/pre&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;시각화&lt;/blockquote&gt;
&lt;pre id=&quot;code_1779610645899&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(20, 13))
plt.plot(preds[:1000], label='Predicted')
plt.plot(trues[:1000], label = 'True')
plt.legend()
plt.grid()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리60.png&quot; data-origin-width=&quot;1617&quot; data-origin-height=&quot;1044&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/qrX9s/dJMcaaFr5Sy/moPOkXM3W4iEL88KZudr6k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/qrX9s/dJMcaaFr5Sy/moPOkXM3W4iEL88KZudr6k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/qrX9s/dJMcaaFr5Sy/moPOkXM3W4iEL88KZudr6k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqrX9s%2FdJMcaaFr5Sy%2FmoPOkXM3W4iEL88KZudr6k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;470&quot; height=&quot;1044&quot; data-filename=&quot;티스토리60.png&quot; data-origin-width=&quot;1617&quot; data-origin-height=&quot;1044&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;7주차 소감&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;잘 버텨오다가 DataLoader에서 조금 무너졌다... 그래 여기까지가 내 배경지식이구나... &lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;참고도서를 읽어서 해당 부분 제대로 복습하고 다음 주 부분 들어가야겠다...! &lt;/p&gt;</description>
      <category>멀티캠퍼스부트캠프</category>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/7</guid>
      <comments>https://bbgw-oshoulder.tistory.com/7#entry7comment</comments>
      <pubDate>Sun, 24 May 2026 17:53:46 +0900</pubDate>
    </item>
    <item>
      <title>6주차 Note: 머신러닝</title>
      <link>https://bbgw-oshoulder.tistory.com/6</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;KakaoTalk_20260516_180346697.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bQUmmH/dJMcaja5P2P/I0PCGCdf0ZNKUbHiwz5a41/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bQUmmH/dJMcaja5P2P/I0PCGCdf0ZNKUbHiwz5a41/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bQUmmH/dJMcaja5P2P/I0PCGCdf0ZNKUbHiwz5a41/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbQUmmH%2FdJMcaja5P2P%2FI0PCGCdf0ZNKUbHiwz5a41%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;472&quot; height=&quot;252&quot; data-filename=&quot;KakaoTalk_20260516_180346697.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt; ️ 6주차: 5월 11일 ~ 5월 15일&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt; 부트캠프 6주차 요약&lt;/b&gt;✍️&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/11 ] &lt;b&gt;머신러닝&lt;/b&gt;: LinearRegression, DecisionTree&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/12 ] &lt;b&gt;머신러닝&lt;/b&gt;: 다항 회귀, Ridge, Lasso, ElasticNet&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/13 ] &lt;b&gt;머신러닝&lt;/b&gt;: LogisticRegression, SVM&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/14 ] &lt;b&gt;머신러닝&lt;/b&gt;: Bagging, Boosting&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/15 ] &lt;b&gt;머신러닝&lt;/b&gt;: RandomForest, Pipeline, GridSearchCV&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 11일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; 오후 시간에는 Logistic Regression을 포함한 다양한 분류/회귀 모델의 이론을 학습하였다.&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;내일이면 해당 부분 코딩을 하게될 것인데, 단순 모델에서 확장된 개념을 실습할 생각하니 기대된다!&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;  LinearRegression&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;저번 시간 마지막에 짧게 하고 넘어갔던 LinearRegression&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;b&gt;Review&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;boston dataset 활용&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778483894553&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

boston = pd.read_csv('../csv/boston.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터 분할&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778484443900&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train, X_test, y_train, y_test = train_test_split(
    boston.drop('Price', axis=1),
    boston['Price'],
    test_size = 0.3,
    random_state = 42
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;스케일링&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778484767227&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;stdScaler = StandardScaler()
X_train_sc = stdScaler.fit_transform(X_train)
X_test_sc = stdScaler.transform(X_test)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;선형 회귀 모델 생성, 학습, 예측&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778485313048&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;lr = LinearRegression()
lr_inter = LinearRegression(fit_intercept = False)

# 기본형 모델에 스케일링하지 않은 데이터 사용
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
print(r2_score(y_test, pred))		# 0.7112260057484925

# 기본형 모델에 스케일링한 데이터 사용
lr.fit(X_train_sc, y_train)
pred = lr.predict(X_test_sc)
print(r2_score(y_test, pred))		# 0.7112260057484932

# 절편을 사용하지 않는 모델에 스케일링하지 않은 데이터 사용
lr_inter.fit(X_train, y_train)
pred = lr_inter.predict(X_test)
print(r2_score(y_test, pred))		# 0.6662585112262787

# 절편을 사용하지 않는 모델에 스케일링한 데이터 사용
lr_inter.fit(X_train_sc, y_train)
pred = lr_inter.predict(X_test_sc)
print(r2_score(y_test, pred))		# -6.445989223950212&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  DecisionTree&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;저번 시간 마지막에 짧게 하고 넘어갔던 DecisionTree&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;b&gt;Review&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778485597522&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

iris = pd.read_csv('../csv/iris.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터 분할&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778485673460&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train, X_test, y_train, y_test = train_test_split(
    iris.drop('species', axis = 1),
    iris['species'],
    test_size = 0.2,
    random_state = 42,
    stratify = iris['species']
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;연습 문제&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1. max_depth 5, 3, 1을 기준으로 3개의 모델을 생성&amp;nbsp;&amp;nbsp; &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2. score(X_train, y_train) 함수를 이용하여 학습된 데이터에서 정확도를 확인&amp;nbsp;&amp;nbsp; &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;3. X_test를 이용하여 예측 &amp;rarr; f1_score를 이용하여 모델의 성능 확인&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778486131527&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 1)
tree1 = DecisionTreeClassifier(max_depth=1, random_state=42)
tree3 = DecisionTreeClassifier(max_depth=3, random_state=42)
tree5 = DecisionTreeClassifier(max_depth=5, random_state=42)

tree1.fit(X_train, y_train)
tree3.fit(X_train, y_train)
tree5.fit(X_train, y_train)

# 2)
print(tree1.score(X_train, y_train))	# 0.6666666666666666
print(tree3.score(X_train, y_train))	# 0.9833333333333333
print(tree5.score(X_train, y_train))	# 1.0

# 3)
pred1 = tree1.predict(X_test)
pred3 = tree3.predict(X_test)
pred5 = tree5.predict(X_test)

print(f1_score(y_test, pred1, average='micro'))		# 0.6666666666666666
print(f1_score(y_test, pred3, average='micro'))		# 0.9666666666666667
print(f1_score(y_test, pred5, average='micro'))		# 0.9333333333333333&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;min_samples_split 매개변수 사용&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;min_samples_split : 분할 가능한 최소 샘플 개수&lt;/p&gt;
&lt;pre id=&quot;code_1778487775630&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf = DecisionTreeClassifier(max_depth=3, min_samples_split=45, random_state=42)
clf.fit(X_train, y_train)
feature_names = X_train.columns
class_names = y_train.unique()

# 해당 매개변수가 없는 tree와 비교
plt.figure(figsize=(30, 30))
plt.subplot(1, 2, 1)
plot_tree(tree3, feature_names=feature_names, class_names=class_names, filled = True)
plt.title('without min_samples_split')
plt.subplot(1, 2, 2)
plot_tree(clf, feature_names=feature_names, class_names=class_names, filled = True)
plt.title('with min_samples_split')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리43.png&quot; data-origin-width=&quot;2345&quot; data-origin-height=&quot;2352&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/VOi1L/dJMcafmhipF/FDQwxYuA5hcA55RkNlIlDK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/VOi1L/dJMcafmhipF/FDQwxYuA5hcA55RkNlIlDK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/VOi1L/dJMcafmhipF/FDQwxYuA5hcA55RkNlIlDK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FVOi1L%2FdJMcafmhipF%2FFDQwxYuA5hcA55RkNlIlDK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2345&quot; height=&quot;2352&quot; data-filename=&quot;티스토리43.png&quot; data-origin-width=&quot;2345&quot; data-origin-height=&quot;2352&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 12일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; 이론이 쏟아진다아아아아...  버텨내자아아아... &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  다항 회귀&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;cereal dataset 활용&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;라이브러리, 데이터 로드&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778545809203&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

cereal = pd.read_csv('../data/cereal.csv')
cereal.info()&lt;/code&gt;&lt;/pre&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&amp;nbsp;&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;365&quot; data-origin-height=&quot;448&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bkDQoX/dJMcahqR2t8/hhiI2jjPpCpnZ7k5vdjYu0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bkDQoX/dJMcahqR2t8/hhiI2jjPpCpnZ7k5vdjYu0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bkDQoX/dJMcahqR2t8/hhiI2jjPpCpnZ7k5vdjYu0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkDQoX%2FdJMcahqR2t8%2FhhiI2jjPpCpnZ7k5vdjYu0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;235&quot; height=&quot;288&quot; data-origin-width=&quot;365&quot; data-origin-height=&quot;448&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778548579033&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;cereal.describe()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1132&quot; data-origin-height=&quot;258&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dVG1by/dJMcacJSh38/JBKxu6e0l2p5NWyrUQL34K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dVG1by/dJMcacJSh38/JBKxu6e0l2p5NWyrUQL34K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dVG1by/dJMcacJSh38/JBKxu6e0l2p5NWyrUQL34K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdVG1by%2FdJMcacJSh38%2FJBKxu6e0l2p5NWyrUQL34K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1132&quot; height=&quot;258&quot; data-origin-width=&quot;1132&quot; data-origin-height=&quot;258&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;최솟값이 음수인 이상치들이 carbo, sugars, potass 세 column에서 관측되었다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이상치 제거&lt;/p&gt;
&lt;pre id=&quot;code_1778548977765&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;flag = (cereal[['carbo', 'sugars', 'potass']] &amp;lt; 0).any(axis = 1)

df = cereal.loc[~flag, ]
df.describe()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1126&quot; data-origin-height=&quot;254&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/sdRoz/dJMcaiiYIJV/Vt2TCzavwvXOjCn8YloYA1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/sdRoz/dJMcaiiYIJV/Vt2TCzavwvXOjCn8YloYA1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/sdRoz/dJMcaiiYIJV/Vt2TCzavwvXOjCn8YloYA1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FsdRoz%2FdJMcaiiYIJV%2FVt2TCzavwvXOjCn8YloYA1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1126&quot; height=&quot;254&quot; data-origin-width=&quot;1126&quot; data-origin-height=&quot;254&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;다항 회귀선 시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1차 회귀선 시각화&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1778549482052&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sns.regplot(
    data = df, x = 'sugars', y = 'rating'
)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리44.png&quot; data-origin-width=&quot;562&quot; data-origin-height=&quot;432&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/4Eo41/dJMcabjTZ0b/gec1uQvJBCqr6dwHjMKTu1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/4Eo41/dJMcabjTZ0b/gec1uQvJBCqr6dwHjMKTu1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/4Eo41/dJMcabjTZ0b/gec1uQvJBCqr6dwHjMKTu1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F4Eo41%2FdJMcabjTZ0b%2Fgec1uQvJBCqr6dwHjMKTu1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;369&quot; height=&quot;284&quot; data-filename=&quot;티스토리44.png&quot; data-origin-width=&quot;562&quot; data-origin-height=&quot;432&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2차 회귀선 시각화&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1778550037043&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sns.regplot(
    data = df, x = 'sugars', y = 'rating', order = 2
)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리45.png&quot; data-origin-width=&quot;562&quot; data-origin-height=&quot;432&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/DrxEl/dJMcaaSOtQd/pVpGu50XiUyhYpKIIImqH0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/DrxEl/dJMcaaSOtQd/pVpGu50XiUyhYpKIIImqH0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/DrxEl/dJMcaaSOtQd/pVpGu50XiUyhYpKIIImqH0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FDrxEl%2FdJMcaaSOtQd%2FpVpGu50XiUyhYpKIIImqH0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;357&quot; height=&quot;274&quot; data-filename=&quot;티스토리45.png&quot; data-origin-width=&quot;562&quot; data-origin-height=&quot;432&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;단항회귀 vs 다항회귀 결과 비교&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778550371658&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# column 하나만 선택하기 위해 상관계수 확인

df.iloc[:, 3:].corr().iloc[-1, ]&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;298&quot; data-origin-height=&quot;283&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/947Rl/dJMcaaSOtZb/skYEYYSZ03zitJ7aGAWhE1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/947Rl/dJMcaaSOtZb/skYEYYSZ03zitJ7aGAWhE1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/947Rl/dJMcaaSOtZb/skYEYYSZ03zitJ7aGAWhE1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F947Rl%2FdJMcaaSOtZb%2FskYEYYSZ03zitJ7aGAWhE1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;194&quot; height=&quot;184&quot; data-origin-width=&quot;298&quot; data-origin-height=&quot;283&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;단항회귀&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1778550447451&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = df[['sugars']]
y = df['rating']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

lr = LinearRegression()
lr.fit(X_train, y_train)

pred = lr.predict(X_test)
print(mean_absolute_error(y_test, pred))
# 7.469662633513549&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;2차항(다항) 회귀&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1778551215300&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;poly_reg = PolynomialFeatures()

X_train_poly = poly_reg.fit_transform(X_train)
X_test_poly = poly_reg.transform(X_test)

lr.fit(X_train_poly, y_train)
pred_2 = lr.predict(X_test_poly)

print(mean_absolute_error(y_test, pred_2))
# 6.203734788707707&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;문자열로 이루어진 데이터 컬럼은 모두 제외하고, 단항회귀 vs 다항회귀 MAE 값 비교&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1778552141877&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 단항회귀

df1 = df.select_dtypes('number')
X = df1.drop('rating', axis=1)
y = df1['rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

lr.fit(X_train, y_train)

pred_4 = lr.predict(X_test)

print('MAE:', mean_absolute_error(y_test, pred_4))
print('R2 score:', r2_score(y_test, pred_4))
print()
print('회귀계수:', lr.coef_, ', 절편:', lr.intercept_)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;716&quot; data-origin-height=&quot;126&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/crwFLz/dJMcaicfTkg/6qC8vfhljCqSFaXj9OZ4wk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/crwFLz/dJMcaicfTkg/6qC8vfhljCqSFaXj9OZ4wk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/crwFLz/dJMcaicfTkg/6qC8vfhljCqSFaXj9OZ4wk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcrwFLz%2FdJMcaicfTkg%2F6qC8vfhljCqSFaXj9OZ4wk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;563&quot; height=&quot;99&quot; data-origin-width=&quot;716&quot; data-origin-height=&quot;126&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1778552195275&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 다항회귀

X_train_poly = poly_reg.fit_transform(X_train)
X_test_poly = poly_reg.transform(X_test)

lr.fit(X_train_poly, y_train)

pred_5 = lr.predict(X_test_poly)

print('MAE:', mean_absolute_error(y_test, pred_5))
print('R2 score:', r2_score(y_test, pred_5))
print()
print('회귀계수:', lr.coef_, ', 절편:', lr.intercept_)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;731&quot; data-origin-height=&quot;510&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bKnrKi/dJMb990GIZY/5zwtkiRNcAEudZ7XWxZRFk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bKnrKi/dJMb990GIZY/5zwtkiRNcAEudZ7XWxZRFk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bKnrKi/dJMb990GIZY/5zwtkiRNcAEudZ7XWxZRFk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbKnrKi%2FdJMb990GIZY%2F5zwtkiRNcAEudZ7XWxZRFk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;562&quot; height=&quot;392&quot; data-origin-width=&quot;731&quot; data-origin-height=&quot;510&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  Ridge&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;diabetes dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778565854620&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt

diabetes = load_diabetes()
df = pd.DataFrame(diabetes['data'], columns= diabetes['feature_names'])
df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;813&quot; data-origin-height=&quot;175&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/QoEua/dJMcaakW7nV/bnNStiw3YnOSGVj1qy7ggk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/QoEua/dJMcaakW7nV/bnNStiw3YnOSGVj1qy7ggk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/QoEua/dJMcaakW7nV/bnNStiw3YnOSGVj1qy7ggk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FQoEua%2FdJMcaakW7nV%2FbnNStiw3YnOSGVj1qy7ggk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;813&quot; height=&quot;175&quot; data-origin-width=&quot;813&quot; data-origin-height=&quot;175&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1778566066107&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;alphas = np.logspace(-3, 1, 5)
alphas			# array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01])

data = []

for a in alphas:
    # 모델 생성
    ridge = Ridge(alpha = a)
    ridge.fit(diabetes['data'], diabetes['target'])
    # 학습된 모델의 회귀계수를 data 리스트에 추가
    data.append(
        ridge.coef_
    )

df_ridge = pd.DataFrame(data, index=alphas, columns = df.columns)
df_ridge&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;944&quot; data-origin-height=&quot;174&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cbefGS/dJMcaicggiR/kkZyvCk2q2X4Ssr7kz533k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cbefGS/dJMcaicggiR/kkZyvCk2q2X4Ssr7kz533k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cbefGS/dJMcaicggiR/kkZyvCk2q2X4Ssr7kz533k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcbefGS%2FdJMcaicggiR%2FkkZyvCk2q2X4Ssr7kz533k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;944&quot; height=&quot;174&quot; data-origin-width=&quot;944&quot; data-origin-height=&quot;174&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1778566154207&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(16, 15))

plt.axhline(y=0, linestyle='--', linewidth=2, color='black')

plt.plot(df_ridge.loc[0.001, :], '^-', color=&quot;#ffb8b8&quot;)
plt.plot(df_ridge.loc[0.01, :], 'o-', color=&quot;#ff9191&quot;)
plt.plot(df_ridge.loc[0.1, :], 'v-', color=&quot;#ff6060&quot;)
plt.plot(df_ridge.loc[1.0, :], '*-', color=&quot;#ff3434&quot;)
plt.plot(df_ridge.loc[10.0, :], 's-', color=&quot;#d40000&quot;)

plt.legend(['center', 0.001, 0.01, 0.1, 1, 10], bbox_to_anchor = (1, 1))
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리46.png&quot; data-origin-width=&quot;1307&quot; data-origin-height=&quot;1198&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cC4zAm/dJMb99M5xCo/FAeM3vBs7Ba6bNz1zQrfD1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cC4zAm/dJMb99M5xCo/FAeM3vBs7Ba6bNz1zQrfD1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cC4zAm/dJMb99M5xCo/FAeM3vBs7Ba6bNz1zQrfD1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcC4zAm%2FdJMb99M5xCo%2FFAeM3vBs7Ba6bNz1zQrfD1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1307&quot; height=&quot;1198&quot; data-filename=&quot;티스토리46.png&quot; data-origin-width=&quot;1307&quot; data-origin-height=&quot;1198&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778574337642&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train, X_test, y_train, y_test = train_test_split(
    diabetes['data'], diabetes['target'], test_size=0.2, random_state=42
    )

lr = LinearRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
print(round(r2_score(y_test, pred), 4))		# 0.4526

for a in [0.01, 0.1, 1]:
    ridge = Ridge(alpha = a)
    ridge.fit(X_train, y_train)
    pred2 = ridge.predict(X_test)
    print(f'alpha = {a} / MAE = {round(mean_absolute_error(y_test, pred2), 4)} / r&amp;sup2; = {round(r2_score(y_test, pred2), 4)}')

# alpha = 0.01 / MAE = 42.8369 / r&amp;sup2; = 0.456
# alpha = 0.1 / MAE = 42.9969 / r&amp;sup2; = 0.4609
# alpha = 1 / MAE = 46.1389 / r&amp;sup2; = 0.4192&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;미세하지만 Ridge의 성능이 더 좋은 것을 확인할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  Lasso&lt;/b&gt;&lt;/h3&gt;
&lt;pre id=&quot;code_1778574405363&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.linear_model import Lasso&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1778574439671&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;lasso_1 = Lasso(alpha = 0.01)
lasso_2 = Lasso(alpha = 0.1)
lasso_3 = Lasso(alpha = 1)

# Ridge에서 분할한 데이터 사용
lasso_1.fit(X_train, y_train)
lasso_2.fit(X_train, y_train)
lasso_3.fit(X_train, y_train)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1778574788314&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pred1 = lasso_1.predict(X_test)
pred2 = lasso_2.predict(X_test)
pred3 = lasso_3.predict(X_test)

print(mean_absolute_error(y_test, pred1))		# 42.83184707336087
print(mean_absolute_error(y_test, pred2))		# 42.85442771664998
print(mean_absolute_error(y_test, pred3))		# 49.73032753662261

print(r2_score(y_test, pred1))				# 0.4566861194580625
print(r2_score(y_test, pred2))				# 0.4718547867276227
print(r2_score(y_test, pred3))				# 0.3575918767219113&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  ElasticNet&lt;/b&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;함수 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778574909284&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.linear_model import ElasticNet&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;모델 호출, 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778574977013&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;ela_1 = ElasticNet(alpha=0.01)
ela_2 = ElasticNet(alpha=0.1)
ela_3 = ElasticNet(alpha=1)

ela_1.fit(X_train, y_train)
ela_2.fit(X_train, y_train)
ela_3.fit(X_train, y_train)

pred_1 = ela_1.predict(X_test)
pred_2 = ela_2.predict(X_test)
pred_3 = ela_3.predict(X_test)

print(r2_score(y_test, pred_1))		# 0.37364841571505814
print(r2_score(y_test, pred_2))		# 0.09865421116113748
print(r2_score(y_test, pred_3))		# -0.0024652131111431164&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #dddddd;&quot;&gt;&lt;s&gt;&lt;i&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;처참한 결과값&lt;/span&gt;&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 13일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; SVM을 ADsP나 빅분기에서는 꽤 깊게 다뤘던 것 같은데, 학교 실습으로는 해본 적 없는 부분이라 의문이 있었다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘 해보니 더욱 잘 알겠다... 결론은&amp;nbsp;&lt;span style=&quot;color: #dddddd;&quot;&gt;&lt;s&gt;&lt;i&gt;&lt;b&gt;앙상블 쓰자 랜덤 포레스트 최고&lt;/b&gt;&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  Logistic Regression&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778632897841&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

body = pd.read_csv('../data/bodyPerformance.csv')
body.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1062&quot; data-origin-height=&quot;178&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cpMXRg/dJMcahqSWq9/KPLyU2GhW50kmtnUCKqYBk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cpMXRg/dJMcahqSWq9/KPLyU2GhW50kmtnUCKqYBk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cpMXRg/dJMcahqSWq9/KPLyU2GhW50kmtnUCKqYBk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcpMXRg%2FdJMcahqSWq9%2FKPLyU2GhW50kmtnUCKqYBk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1062&quot; height=&quot;178&quot; data-origin-width=&quot;1062&quot; data-origin-height=&quot;178&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리: 문자 형태 변환&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;문자 형태인 gender column을 숫자 형태로 변환&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778634056089&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;body['gender'].value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;219&quot; data-origin-height=&quot;79&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ba9PTN/dJMcad20PUz/L6i4tOJsQ4Eudc4EM3suXK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ba9PTN/dJMcad20PUz/L6i4tOJsQ4Eudc4EM3suXK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ba9PTN/dJMcad20PUz/L6i4tOJsQ4Eudc4EM3suXK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fba9PTN%2FdJMcad20PUz%2FL6i4tOJsQ4Eudc4EM3suXK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;219&quot; height=&quot;79&quot; data-origin-width=&quot;219&quot; data-origin-height=&quot;79&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;변환하는 5가지 방법&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778635264789&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# map 활용 1
body['gender'].map(
    lambda x : 0 if x == 'M' else 1
)

# map 활용 2
body['gender'].map(
    {
        'M': 0,
        'F': 1
    }
)

# replace
body['gender'].replace('M', 0).replace('F', 1)

# numpy
np.where(body['gender'] == 'M', 0, 1)

# get_dummies
df = pd.get_dummies(body, columns = 'gender', drop_first = True)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;일부러 데이터 불균형 만들기&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;class에서&amp;nbsp;A는&amp;nbsp;1&amp;nbsp;그&amp;nbsp;외의&amp;nbsp;값들은&amp;nbsp;0으로&amp;nbsp;변환&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778636037563&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df['class_1'] = np.where( df['class'] == 'A', 1, 0 )
df['class_1'].value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;221&quot; data-origin-height=&quot;87&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bac8l7/dJMcafmizaR/hFIjiXx1UCjuzXuyvKAbV0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bac8l7/dJMcafmizaR/hFIjiXx1UCjuzXuyvKAbV0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bac8l7/dJMcafmizaR/hFIjiXx1UCjuzXuyvKAbV0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbac8l7%2FdJMcafmizaR%2FhFIjiXx1UCjuzXuyvKAbV0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;221&quot; height=&quot;87&quot; data-origin-width=&quot;221&quot; data-origin-height=&quot;87&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;데이터 분할&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778636443733&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 독립 변수 종속 변수 데이터 분할
X = df.drop(['class', 'class_1'], axis=1)
y = df['class_1']	# 불균형이 있는 컬럼 활용


# train test 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 42, stratify = y
)
	# 데이터가 불균형하므로 stratify 사용&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;모델 생성, 학습, 예측, 평가&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778636849246&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 모델 생성
logR = LogisticRegression()

# 모델 학습
logR.fit(X_train, y_train)

# 예측
pred = logR.predict(X_test)

# 평가
cm = confusion_matrix(y_test, pred)
acc = accuracy_score(y_test, pred)
prc = precision_score(y_test, pred)
rcll = recall_score(y_test, pred)
f1 = f1_score(y_test, pred)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;혼동 행렬 시각화&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778639520942&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize = (7, 5))

sns.heatmap(
    cm, annot = True, cmap = 'Blues', fmt = 'd',
    xticklabels = ['pred Negative', 'pred Positive'],
    yticklabels = ['actual Negative', 'actual Positive']
)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리48.png&quot; data-origin-width=&quot;568&quot; data-origin-height=&quot;428&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/A7Hop/dJMcagFwEBh/yYZyTbtuJZQcTKZVPwiOnK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/A7Hop/dJMcagFwEBh/yYZyTbtuJZQcTKZVPwiOnK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/A7Hop/dJMcagFwEBh/yYZyTbtuJZQcTKZVPwiOnK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FA7Hop%2FdJMcagFwEBh%2FyYZyTbtuJZQcTKZVPwiOnK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;437&quot; height=&quot;329&quot; data-filename=&quot;티스토리48.png&quot; data-origin-width=&quot;568&quot; data-origin-height=&quot;428&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;평가 지표 수치로 확인&lt;/p&gt;
&lt;pre id=&quot;code_1778639663916&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;print('정확도: ', round(acc, 2))
print('정밀도: ', round(prc, 2))
print('재현율: ', round(rcll, 2))
print('f1 score: ', round(f1, 2))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;185&quot; data-origin-height=&quot;89&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cUCUNc/dJMcacQB5hg/MExnwWSv1aYfXlN3KQGqYk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cUCUNc/dJMcacQB5hg/MExnwWSv1aYfXlN3KQGqYk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cUCUNc/dJMcacQB5hg/MExnwWSv1aYfXlN3KQGqYk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcUCUNc%2FdJMcacQB5hg%2FMExnwWSv1aYfXlN3KQGqYk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;185&quot; height=&quot;89&quot; data-origin-width=&quot;185&quot; data-origin-height=&quot;89&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;정확도는 높은 반면, 클래스 데이터의 불균형으로 인해 정밀도, 재현율, f1 score는 낮은 값을 보이는 것을 확인할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style7&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;클래스별 예측 확률과 decision_function 확인, 시각화&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;번외로, 0(Not A)과 1(A)를 예측할 확률을 보여주는 함수와, 클래스별 점수를 보여주는 decision_function을 활용&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778636877784&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 예측 확률 데이터 확인
proba = pd.DataFrame(logR.predict_proba(X_train))

# 클래스별 점수 (분류에 대한 확신의 점수)
cs = pd.DataFrame(logR.decision_function(X_train))

# proba, cs 데이터 프레임을 단순 열결합
df2 = pd.concat([proba, cs], axis=1)
df2.columns = ['predict proba of Not A', 'predict proba of A', 'decision_function']
df2&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;508&quot; data-origin-height=&quot;365&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dkCwhe/dJMcaiDiiA6/Q9gAEvErvcJkWJPUrdtxMK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dkCwhe/dJMcaiDiiA6/Q9gAEvErvcJkWJPUrdtxMK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dkCwhe/dJMcaiDiiA6/Q9gAEvErvcJkWJPUrdtxMK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdkCwhe%2FdJMcaiDiiA6%2FQ9gAEvErvcJkWJPUrdtxMK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;381&quot; height=&quot;274&quot; data-origin-width=&quot;508&quot; data-origin-height=&quot;365&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;시각화&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778637353110&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 그래프로 만들었을 때 보기 편하도록 정렬
df2.sort_values('decision_function', inplace = True)
df2.reset_index(drop = True, inplace = True)

# 시각화
plt.figure(figsize = (16, 8))
plt.axhline(y = 0.5, linestyle = '--', linewidth = 2, color = 'black', alpha = 0.3)
plt.axvline(x = 0, linestyle = '--', linewidth = 2, color = 'black', alpha = 0.3)
plt.plot(df2['decision_function'], df2['predict proba of Not A'], '--', label = 'Not A', color = &quot;#0400ff&quot;)
plt.plot(df2['decision_function'], df2['predict proba of A'], '--', label = 'A', color = &quot;#ff0000&quot;)
plt.xlabel('Decision Funtion')
plt.ylabel('Predict Probability')
plt.legend()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리47.png&quot; data-origin-width=&quot;1311&quot; data-origin-height=&quot;679&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bI3mGC/dJMcahYGO1G/Jc2r1OBIh2ZKsKzjfN5CW0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bI3mGC/dJMcahYGO1G/Jc2r1OBIh2ZKsKzjfN5CW0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bI3mGC/dJMcahYGO1G/Jc2r1OBIh2ZKsKzjfN5CW0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbI3mGC%2FdJMcahYGO1G%2FJc2r1OBIh2ZKsKzjfN5CW0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1311&quot; height=&quot;679&quot; data-filename=&quot;티스토리47.png&quot; data-origin-width=&quot;1311&quot; data-origin-height=&quot;679&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 불균형 해결&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1. 샘플링 기법을 이용하여 데이터의 균형을 맞춘다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2. 모델 생성 시 불균형한 데이터들에 가중치를 특별 부여&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 중 2번 방법으로 불균형 해결&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778639918162&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;logR2 = LogisticRegression(class_weight='balanced')&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;train, test 분할 시 이미 계층화 작업을 수행했으므로 해당 작업은 다시하지 않는다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778640021280&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;logR2.fit(X_train, y_train)
pred_2 = logR2.predict(X_test)

acc = accuracy_score(y_test, pred_2)
prc = precision_score(y_test, pred_2)
rcll = recall_score(y_test, pred_2)
f1 = f1_score(y_test, pred_2)

print('정확도: ', round(acc, 2))
print('정밀도: ', round(prc, 2))
print('재현율: ', round(rcll, 2))
print('f1 score: ', round(f1, 2))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;149&quot; data-origin-height=&quot;95&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cLcyOb/dJMcahxD82b/oA6t3TBay04zt8ogUpC9Xk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cLcyOb/dJMcahxD82b/oA6t3TBay04zt8ogUpC9Xk/img.png&quot; data-alt=&quot;class_weight 사용 후 평가 지표&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cLcyOb/dJMcahxD82b/oA6t3TBay04zt8ogUpC9Xk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcLcyOb%2FdJMcahxD82b%2FoA6t3TBay04zt8ogUpC9Xk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;181&quot; height=&quot;115&quot; data-origin-width=&quot;149&quot; data-origin-height=&quot;95&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;class_weight 사용 후 평가 지표&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;135&quot; data-origin-height=&quot;88&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cjoB5P/dJMcaiQN1V3/qITCgQO4Ye09jHRxb0Y3dK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cjoB5P/dJMcaiQN1V3/qITCgQO4Ye09jHRxb0Y3dK/img.png&quot; data-alt=&quot;class_weight 사용 전 평가 지표&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cjoB5P/dJMcaiQN1V3/qITCgQO4Ye09jHRxb0Y3dK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjoB5P%2FdJMcaiQN1V3%2FqITCgQO4Ye09jHRxb0Y3dK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;184&quot; height=&quot;120&quot; data-origin-width=&quot;135&quot; data-origin-height=&quot;88&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;class_weight 사용 전 평가 지표&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;정확도와 정밀도는 감소했지만, 재현율과 f1 score는 증가한 것을 볼 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터 전처리: 이상치 제거&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 7일에 만들어놓은 outlier_iqr2 함수를 py 파일을 통해 모듈화 시켜 진행&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;outlier 모듈 코드&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778645472553&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import numpy as np

def outlier_iqr(data, *cols, n = 1.5, drop = False):
    df = data.copy()
    whis_dict = {}

    for col in cols:
        try:
            q_1, q_3 = np.percentile(df[col], [25, 75])
            iqr = q_3 - q_1

            upper_whis = q_3 + (n * iqr)
            lower_whis = q_1 - (n * iqr)

            print(f'''
                지정된 컬럼의 이름: {col},
                상단 경계: {upper_whis},
                하단 경계 {lower_whis}''')

            upper_flag = df[col] &amp;gt; upper_whis
            lower_flag = df[col] &amp;lt; lower_whis
            upper_n = len( df.loc[upper_flag, ] )
            lower_n = len( df.loc[lower_flag, ] )
            print(f'상단 경계를 벗어나는 데이터의 개수: {upper_n}, 하단 경계를 벗어나는 데이터의 개수: {lower_n}')
            whis_df = df.loc[upper_flag|lower_flag, ]
            whis_dict[col] = whis_df

            if drop:
                df = df.loc[ ~(upper_flag | lower_flag), ]
            else:
                df.loc[upper_flag, col] = upper_whis
                df.loc[lower_flag, col] = lower_whis

        except Exception as e:
            print(f'Error: {e}')
    
    return df, whis_dict&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;함수 로드&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778645528251&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from outlier import outlier_iqr&lt;/code&gt;&lt;/pre&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;이상치 제거&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778645870276&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;outlier_drop_df, outlier_dict = outlier_iqr(df, *df.drop('class_1', axis=1).columns, drop = True)
print(len(body), len(outlier_drop_df))		# 13393 12634&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;분할, 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778646083350&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train, X_test, y_train, y_test = train_test_split(
    outlier_drop_df.drop(['class', 'class_1'], axis=1),
    outlier_drop_df['class_1'],
    test_size = 0.3,
    stratify = outlier_drop_df['class_1']
)

logR2.fit(X_train, y_train)
pred_3 = logR2.predict(X_test)

acc = accuracy_score(y_test, pred_3)
prc = precision_score(y_test, pred_3)
rcll = recall_score(y_test, pred_3)
f1 = f1_score(y_test, pred_3)

print('정확도: ', round(acc, 2))
print('정밀도: ', round(prc, 2))
print('재현율: ', round(rcll, 2))
print('f1 score: ', round(f1, 2))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;182&quot; data-origin-height=&quot;87&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dfQj3Y/dJMcac38EGA/0qKrEuzdtdbMR0ieQd0np1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dfQj3Y/dJMcac38EGA/0qKrEuzdtdbMR0ieQd0np1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dfQj3Y/dJMcac38EGA/0qKrEuzdtdbMR0ieQd0np1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdfQj3Y%2FdJMcac38EGA%2F0qKrEuzdtdbMR0ieQd0np1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;182&quot; height=&quot;87&quot; data-origin-width=&quot;182&quot; data-origin-height=&quot;87&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 전처리: 샘플링&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RandomUnderSampler, SMOTE 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;필요한 부분 import&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778647329715&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;분석하기 전에 매번 같은 코드를 작성하므로 해당 부분 함수로 생성&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;적합~평가 함수 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778647052911&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def logR_function(x, y, weight = None):
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, test_size = 0.3, random_state = 42, stratify = y
    )

    model = LogisticRegression(class_weight = weight)

    model.fit(X_train, y_train)

    pred = model.predict(X_test)

    acc = accuracy_score(y_test, pred)
    prc = precision_score(y_test, pred, average = 'macro')
    rcll = recall_score(y_test, pred, average = 'macro')
    f1 = f1_score(y_test, pred, average = 'macro')

    print('정확도: ', round(acc, 2))
    print('정밀도: ', round(prc, 2))
    print('재현율: ', round(rcll, 2))
    print('f1 score: ', round(f1, 2))
    return pred, y_test&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;RandomUnderSampler 활용&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778647267820&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;undersample = RandomUnderSampler(sampling_strategy = 1)
    # 소수 클래스와 다수 클래스의 숫자를 똑같이 맞춤: 약 6700개의 데이터를 날림

X = df.drop(['class', 'class_1'], axis = 1)
y = df['class_1']

X_under, y_under = undersample.fit_resample(X, y)

under_pred, y_test = logR_function(X_under, y_under)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;155&quot; data-origin-height=&quot;84&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wFNeC/dJMcabYvryK/zF2xPyNzc464LAk2X8sfnk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wFNeC/dJMcabYvryK/zF2xPyNzc464LAk2X8sfnk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wFNeC/dJMcabYvryK/zF2xPyNzc464LAk2X8sfnk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FwFNeC%2FdJMcabYvryK%2FzF2xPyNzc464LAk2X8sfnk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;155&quot; height=&quot;84&quot; data-origin-width=&quot;155&quot; data-origin-height=&quot;84&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;SMOTE 활용&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778647433971&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;smote = SMOTE(sampling_strategy=1)

X_over, y_over = smote.fit_resample(X, y)

over_pred, y_test = logR_function(X_over, y_over)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;155&quot; data-origin-height=&quot;92&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ey2qaU/dJMcacQChde/BlxM4LhxR2fYTBVwQkIH1k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ey2qaU/dJMcacQChde/BlxM4LhxR2fYTBVwQkIH1k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ey2qaU/dJMcacQChde/BlxM4LhxR2fYTBVwQkIH1k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fey2qaU%2FdJMcacQChde%2FBlxM4LhxR2fYTBVwQkIH1k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;155&quot; height=&quot;92&quot; data-origin-width=&quot;155&quot; data-origin-height=&quot;92&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;연습 문제&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1.&amp;nbsp;body&amp;nbsp;데이터에서&amp;nbsp;성별&amp;nbsp;column을&amp;nbsp;0,&amp;nbsp;1로&amp;nbsp;변환 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2.&amp;nbsp;class&amp;nbsp;컬럼의&amp;nbsp;데이터:&amp;nbsp;A는&amp;nbsp;1,&amp;nbsp;B는&amp;nbsp;2,&amp;nbsp;C는&amp;nbsp;3,&amp;nbsp;D는&amp;nbsp;4로&amp;nbsp;변경 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;3.&amp;nbsp;극단치&amp;nbsp;데이터를&amp;nbsp;경계값들로&amp;nbsp;대체 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;4.&amp;nbsp;로지스틱&amp;nbsp;회귀를&amp;nbsp;이용하여&amp;nbsp;혼동행렬,&amp;nbsp;나머지&amp;nbsp;성능&amp;nbsp;지표를&amp;nbsp;확인&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778650004478&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;body = pd.read_csv('../data/bodyPerformance.csv')

# 1)
body['gender'] = body['gender'].map(lambda x : 0 if x == 'M' else 1)

# 2)
body['class'] = body['class'].map( {'A': 1, 'B': 2, 'C': 3, 'D': 4} )

# 3)
outlier_replace_df, outlier_dict2 = outlier_iqr(body, *body.drop('class', axis=1).columns)

# 4)
X = outlier_replace_df.drop(['class'], axis = 1)
y = outlier_replace_df['class']

pred, y_test = logR_function(X, y)
print()
print(confusion_matrix(y_test, pred))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;247&quot; data-origin-height=&quot;178&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bgFze6/dJMcahdk6Sq/QpDTf7kMiLQS61fskn3VrK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bgFze6/dJMcahdk6Sq/QpDTf7kMiLQS61fskn3VrK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bgFze6/dJMcahdk6Sq/QpDTf7kMiLQS61fskn3VrK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbgFze6%2FdJMcahdk6Sq%2FQpDTf7kMiLQS61fskn3VrK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;179&quot; height=&quot;129&quot; data-origin-width=&quot;247&quot; data-origin-height=&quot;178&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;혼동 행렬 시각화&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778651405489&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(7,5))

sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', xticklabels=[1,2,3,4], yticklabels=[1,2,3,4])
plt.ylabel('Actual Class')
plt.xlabel('Pred Class')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리49.png&quot; data-origin-width=&quot;579&quot; data-origin-height=&quot;448&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/QRkkB/dJMcahdk8KG/vFvQSb1QK5agYfA3Y99kz0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/QRkkB/dJMcahdk8KG/vFvQSb1QK5agYfA3Y99kz0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/QRkkB/dJMcahdk8KG/vFvQSb1QK5agYfA3Y99kz0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FQRkkB%2FdJMcahdk8KG%2FvFvQSb1QK5agYfA3Y99kz0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;579&quot; height=&quot;448&quot; data-filename=&quot;티스토리49.png&quot; data-origin-width=&quot;579&quot; data-origin-height=&quot;448&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  Support Vector Machine (SVM)&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778659385307&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

df = pd.read_csv('../data/classification.csv')
df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;272&quot; data-origin-height=&quot;177&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/vs8uB/dJMcaffy3c3/mVMIEcCRtaBHSILanksUiK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/vs8uB/dJMcaffy3c3/mVMIEcCRtaBHSILanksUiK/img.png&quot; data-alt=&quot;총 297행의 데이터&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/vs8uB/dJMcaffy3c3/mVMIEcCRtaBHSILanksUiK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fvs8uB%2FdJMcaffy3c3%2FmVMIEcCRtaBHSILanksUiK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;272&quot; height=&quot;177&quot; data-origin-width=&quot;272&quot; data-origin-height=&quot;177&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;총 297행의 데이터&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  SVC: SVM으로 분류 작업을 할 때&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 클래스의 분포를 그래프로 확인&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778659524515&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sns.pairplot(
    data = df,
    hue = 'success'
)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리50.png&quot; data-origin-width=&quot;572&quot; data-origin-height=&quot;496&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/biLbH0/dJMcafs2ODz/hm9ZKrq2EIqw8XBukWXlG0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/biLbH0/dJMcafs2ODz/hm9ZKrq2EIqw8XBukWXlG0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/biLbH0/dJMcafs2ODz/hm9ZKrq2EIqw8XBukWXlG0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbiLbH0%2FdJMcafs2ODz%2Fhm9ZKrq2EIqw8XBukWXlG0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;366&quot; height=&quot;317&quot; data-filename=&quot;티스토리50.png&quot; data-origin-width=&quot;572&quot; data-origin-height=&quot;496&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;SVC 모델 학습, 예측, 평가&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778659892519&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = df.drop('success', axis = 1)
y = df['success']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

svc = SVC()
svc_2 = SVC(C=0.1)

svc.fit(X_train, y_train)
svc_2.fit(X_train, y_train)

pred = svc.predict(X_test)
pred2 = svc_2.predict(X_test)


acc = accuracy_score(y_test, pred)
acc_2 = accuracy_score(y_test, pred2)

prc = precision_score(y_test, pred)
prc_2 = precision_score(y_test, pred2)

rcll = recall_score(y_test, pred)
rcll_2 = recall_score(y_test, pred2)

f1 = f1_score(y_test, pred)
f1_2 = f1_score(y_test, pred2)

print('정확도: ', round(acc, 4), round(acc_2, 4))
print('정밀도: ', round(prc, 4), round(prc_2, 4))
print('재현율: ', round(rcll, 4), round(rcll_2, 4))
print('f1 score: ', round(f1, 4), round(f1_2, 4))

# 정확도:  0.9 0.9
# 정밀도:  0.9821 0.9821
# 재현율:  0.873 0.873
# f1 score:  0.9244 0.9244&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;뭔가 잘못됐다... 왜 평가지표가 똑같이 나오는 거야 &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;근데 왜 똑같이 나오는 지 모르겠다... 어디서 잘못된거냐...&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  &lt;b&gt;SVR: SVM으로 회귀 작업을 할 때&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;추가 라이브러리 로드, 데이터 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778660295429&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;가지고 있는 데이터로 SVR을 돌리면 시간이 너무 오래 걸려서, 간단한 데이터셋을 만들기로 했다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778660400669&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = np.sort(5 * np.random.rand(40, 1), axis = 0)
y = np.sin(x).ravel()
    # ravel: 1차원으로 변경

# 노이즈 추가
y[::5] += 3 * (0.5 - np.random.rand(8))&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;모델 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778660449200&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;svr_rbf.fit(x, y)
svr_lin.fit(x, y)
svr_poly.fit(x, y)

pred_rbf = svr_rbf.predict(x)
pred_lin = svr_lin.predict(x)
pred_poly = svr_poly.predict(x)

index = ['RBF', 'Linear', 'Poly']
cols = ['MSE', 'R2']

result = pd.DataFrame(index = index, columns = cols)
preds = [pred_rbf, pred_lin, pred_poly]

for pred, idx in zip(preds, index):
    mse = mean_squared_error(y, pred)
    r2 = r2_score(y, pred)

    result.loc[idx, 'MSE'] = round(mse, 2)
    result.loc[idx, 'R2'] = round(r2*100, 2)

result&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;157&quot; data-origin-height=&quot;117&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/uXGOp/dJMcai4jYPv/HWKAjbOkuk9TNC7iAO9ohk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/uXGOp/dJMcai4jYPv/HWKAjbOkuk9TNC7iAO9ohk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/uXGOp/dJMcai4jYPv/HWKAjbOkuk9TNC7iAO9ohk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FuXGOp%2FdJMcai4jYPv%2FHWKAjbOkuk9TNC7iAO9ohk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;157&quot; height=&quot;117&quot; data-origin-width=&quot;157&quot; data-origin-height=&quot;117&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;y가 sin 곡선에서 노이즈를 추가한 것이기에 linear나 다항식의 형태로는 좋은 성과를 거두지 못하고,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RBF(가우시안)의 형태가 가장 성능이 좋은 모습을 보였다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 14일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감: &lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;곧 모델의 1황 랜덤 포레스트가 온다... 오늘은 그를 위한 기틀을 닦는 날 &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Ensemble&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;hotel datasets 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778719765297&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import f1_score

hotel = pd.read_csv('../data/hotel_bookings.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;이상치&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778721350071&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;503&quot; data-origin-height=&quot;351&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/JPfe9/dJMcagerGRv/0X93vTXpTbLhnVWhdKCNZ1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/JPfe9/dJMcagerGRv/0X93vTXpTbLhnVWhdKCNZ1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/JPfe9/dJMcagerGRv/0X93vTXpTbLhnVWhdKCNZ1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJPfe9%2FdJMcagerGRv%2F0X93vTXpTbLhnVWhdKCNZ1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;503&quot; height=&quot;351&quot; data-origin-width=&quot;503&quot; data-origin-height=&quot;351&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1778721375636&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel.describe()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1518&quot; data-origin-height=&quot;255&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MeXJ8/dJMcafs3bxG/Ndq2hUvsPLgmqmFhkowx4K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MeXJ8/dJMcafs3bxG/Ndq2hUvsPLgmqmFhkowx4K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MeXJ8/dJMcafs3bxG/Ndq2hUvsPLgmqmFhkowx4K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMeXJ8%2FdJMcafs3bxG%2FNdq2hUvsPLgmqmFhkowx4K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1518&quot; height=&quot;255&quot; data-origin-width=&quot;1518&quot; data-origin-height=&quot;255&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;adr column은 투숙료를 의미하는데, 여기서 0보다 작은 값이 보인다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778721460296&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;flag = hotel['adr'] &amp;lt; 0
hotel.loc[flag,]&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1507&quot; data-origin-height=&quot;66&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/mbibj/dJMb990IwFT/uYtttd2ZAaf8uPZeGgZjvk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/mbibj/dJMb990IwFT/uYtttd2ZAaf8uPZeGgZjvk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/mbibj/dJMb990IwFT/uYtttd2ZAaf8uPZeGgZjvk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmbibj%2FdJMb990IwFT%2FuYtttd2ZAaf8uPZeGgZjvk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1507&quot; height=&quot;66&quot; data-origin-width=&quot;1507&quot; data-origin-height=&quot;66&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;행이 하나밖에 없고, 비상식적인 수치이기에 제거한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778721686929&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel = hotel.loc[~flag,]
hotel.describe()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1508&quot; data-origin-height=&quot;256&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/tViOu/dJMcafGChdg/HiIdywT5roqcEK2iJ1VlR1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/tViOu/dJMcafGChdg/HiIdywT5roqcEK2iJ1VlR1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/tViOu/dJMcafGChdg/HiIdywT5roqcEK2iJ1VlR1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FtViOu%2FdJMcafGChdg%2FHiIdywT5roqcEK2iJ1VlR1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1508&quot; height=&quot;256&quot; data-origin-width=&quot;1508&quot; data-origin-height=&quot;256&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;min 값이 0원이지만 공짜로 투숙하는 경우가 있을 수 있으니 해당 부분은 넘어간다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;결측치&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;lead_time과 adr column의 결측치들을 평균값으로 대체&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778721952050&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel['lead_time'] = hotel['lead_time'].fillna( hotel['lead_time'].mean() )
hotel['adr'] = hotel['adr'].fillna( hotel['adr'].mean() )&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 부분은 결측치를 제외하고 평균 내주는 pandas의 특성 덕에 가능하다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;0과 1로 이루어진 is_repeated_guest column의 결측치들은 최빈값으로 대체&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778722129095&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel['is_repeated_guest'] = hotel['is_repeated_guest'].fillna(
    hotel['is_repeated_guest'].mode()[0]
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;범주형 변수&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;범주형 변수인 deposit_type를 dummy 변수로 변환한다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778722607322&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;hotel['deposit_type'].unique()
# ['No Deposit', 'Refundable', 'Non Refund']

hotel['deposit_type'].value_counts()
# deposit_type
# No Deposit    19137
# Non Refund      834
# Refundable       28

df = pd.get_dummies(hotel, columns=['deposit_type'], drop_first = True)
df.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;496&quot; data-origin-height=&quot;370&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/pgrkp/dJMcaffzrv1/u4KyuG5Ye8ILnkJ8BySsKK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/pgrkp/dJMcaffzrv1/u4KyuG5Ye8ILnkJ8BySsKK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/pgrkp/dJMcaffzrv1/u4KyuG5Ye8ILnkJ8BySsKK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fpgrkp%2FdJMcaffzrv1%2Fu4KyuG5Ye8ILnkJ8BySsKK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;365&quot; height=&quot;272&quot; data-origin-width=&quot;496&quot; data-origin-height=&quot;370&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;class의 불균형 정도 확인&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778722749277&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df['is_canceled'].value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;216&quot; data-origin-height=&quot;83&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/xNPjV/dJMcaf0RVUM/1U0w01d1ucO7zwEDHUcXdk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/xNPjV/dJMcaf0RVUM/1U0w01d1ucO7zwEDHUcXdk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/xNPjV/dJMcaf0RVUM/1U0w01d1ucO7zwEDHUcXdk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FxNPjV%2FdJMcaf0RVUM%2F1U0w01d1ucO7zwEDHUcXdk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;216&quot; height=&quot;83&quot; data-origin-width=&quot;216&quot; data-origin-height=&quot;83&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;불균형이 심하긴 하지만 우선 해당 비율을 유지한 채로 분석을 시행해보겠다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터 분할, 모델 생성/학습/예측/평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;데이터 분할&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778722941468&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 독립/종속 분할
X = df.drop('is_canceled', axis = 1)
y = df['is_canceled']

# train/test 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42, stratify = y
)

y_train.value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;217&quot; data-origin-height=&quot;83&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nfMQy/dJMcab5gjRL/koselRhm3C47uDAs7BLUR0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nfMQy/dJMcab5gjRL/koselRhm3C47uDAs7BLUR0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nfMQy/dJMcab5gjRL/koselRhm3C47uDAs7BLUR0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnfMQy%2FdJMcab5gjRL%2FkoselRhm3C47uDAs7BLUR0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;217&quot; height=&quot;83&quot; data-origin-width=&quot;217&quot; data-origin-height=&quot;83&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;모델 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;배깅 모델을 생성할 때는 기본 모델이 필요한데, 이를 위한 DecisionTree를 먼저 생성한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이때, class 데이터가 불균형하므로 class_weight parameter를 사용해 해당 부분을 커버해준다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778723035897&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;base_model = DecisionTreeClassifier(class_weight = 'balanced')
model = BaggingClassifier(base_model)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;모델 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778723319749&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;model.fit(X_train, y_train)
pred = model.predict(X_test)

print( round( f1_score(y_test, pred), 4 ) )
	# 0.5845&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;생각보다 성능이 좋지 않아, DecisionTreeClassifier의 max_depth 값을 조정해본다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;모델 생성~평가 - 수정&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778723390294&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;base_model = DecisionTreeClassifier(class_weight = 'balanced', max_depth=3)
model = BaggingClassifier(base_model)

model.fit(X_train, y_train)
pred = model.predict(X_test)

print( round( f1_score(y_test, pred), 4 ) )
	# 0.5735&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오히려 성능이 더 떨어지는 모습을 보여, class의 불균형을 해소하는 방안을 시도해본다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리: 샘플링&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RandomOverSampler나 SMOTE 만을 그냥 사용하는 것이 아닌,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;RandomOverSampler를 활용한 후, Bootstrap을 활용하는 방법을 사용해본다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1.&amp;nbsp;RandomOverSampling&amp;nbsp;/&amp;nbsp;2:1로&amp;nbsp;resampling &lt;br /&gt;2.&amp;nbsp;Bagging&amp;nbsp;model의&amp;nbsp;bootstrap을&amp;nbsp;0.8로&amp;nbsp;지정하고&amp;nbsp;n_estimators를&amp;nbsp;100으로&amp;nbsp;설정 &lt;br /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;-&amp;nbsp;DecisionTree&amp;nbsp;Classifier의&amp;nbsp;class_weight는&amp;nbsp;default&amp;nbsp;값으로,&amp;nbsp;max_depth는&amp;nbsp;3으로&amp;nbsp;설정&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778725091493&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(sampling_strategy=0.5)
over_X, over_y = ros.fit_resample(X, y)

over_y.value_counts()
# is_canceled
# 0    17599
# 1     8799
# Name: count, dtype: int64

base_model = DecisionTreeClassifier(max_depth = 3)
model = BaggingClassifier(max_samples = 0.8, n_estimators = 100)

X_train, X_test, y_train, y_test = train_test_split(
    over_X, over_y, test_size = 0.2, random_state = 42, stratify = over_y
)

model.fit(X_train, y_train)
pred = model.predict(X_test)

print( round( f1_score(y_test, pred), 4 ) )
# 0.9323&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;성능이 눈에 띄게 좋아진 것을 확인할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;근데 여기서 내가 잘못한 부분이 무엇일까?&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;정답은 바로&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778726554784&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;base_model = DecisionTreeClassifier(max_depth = 3)
model = BaggingClassifier(max_samples = 0.8, n_estimators = 100)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 부분&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;가만 보면 base_model을 사용하지 않았다.&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리51.jpeg&quot; data-origin-width=&quot;222&quot; data-origin-height=&quot;227&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YLXKK/dJMcag6zEVM/7hoqdoKQeD6WhEWUz3Cir1/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YLXKK/dJMcag6zEVM/7hoqdoKQeD6WhEWUz3Cir1/img.jpg&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YLXKK/dJMcag6zEVM/7hoqdoKQeD6WhEWUz3Cir1/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYLXKK%2FdJMcag6zEVM%2F7hoqdoKQeD6WhEWUz3Cir1%2Fimg.jpg&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;161&quot; height=&quot;165&quot; data-filename=&quot;티스토리51.jpeg&quot; data-origin-width=&quot;222&quot; data-origin-height=&quot;227&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;근데 웃긴 게 &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;base_model 사용하면 성능이 더 떨어진다. f1 score 0.54 정도로...&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리52.png&quot; data-origin-width=&quot;500&quot; data-origin-height=&quot;400&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/p0JsR/dJMcaipM80J/UjkIxFCoASqJk4kRoxDLI1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/p0JsR/dJMcaipM80J/UjkIxFCoASqJk4kRoxDLI1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/p0JsR/dJMcaipM80J/UjkIxFCoASqJk4kRoxDLI1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fp0JsR%2FdJMcaipM80J%2FUjkIxFCoASqJk4kRoxDLI1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;271&quot; height=&quot;217&quot; data-filename=&quot;티스토리52.png&quot; data-origin-width=&quot;500&quot; data-origin-height=&quot;400&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #9d9d9d;&quot;&gt;아니고 그냥 얻어걸린 거임&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;강사님은 max_depth를 사용하지 않아도 n_estimators가 과적합을 방지해주는 것이라 보셨다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이후로도 max_depth를 사용하지 않은 bagging의 결과가 좋았다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Boosting&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;AdaBoost : 분류&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778746578493&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report

body = pd.read_csv('../data/bodyPerformance.csv')
body.head(3)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;158&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bAFRUd/dJMcaayycED/n6IU4r50eSZ4aPwC0L5KY0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bAFRUd/dJMcaayycED/n6IU4r50eSZ4aPwC0L5KY0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bAFRUd/dJMcaayycED/n6IU4r50eSZ4aPwC0L5KY0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbAFRUd%2FdJMcaayycED%2Fn6IU4r50eSZ4aPwC0L5KY0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;600&quot; height=&quot;158&quot; data-origin-width=&quot;847&quot; data-origin-height=&quot;158&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778746700125&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# gender column의 M &amp;rarr; 0, F &amp;rarr; 1
body['gender'] = body['gender'].map({'M': 0, 'F': 1})

# class 변환
body['class'] = body['class'].map({'A': 1, 'B': 2, 'C': 3, 'D': 4})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 분할&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778746854226&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = body.drop('class', axis=1)
y = body['class']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;모델 생성, 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778746884483&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print(classification_report(y_test, pred))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;428&quot; data-origin-height=&quot;214&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/d5Mgft/dJMcahRVKXw/wT4DeSpmK3ITKsno5w1Hv1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/d5Mgft/dJMcahRVKXw/wT4DeSpmK3ITKsno5w1Hv1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/d5Mgft/dJMcahRVKXw/wT4DeSpmK3ITKsno5w1Hv1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd5Mgft%2FdJMcahRVKXw%2FwT4DeSpmK3ITKsno5w1Hv1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;270&quot; height=&quot;135&quot; data-origin-width=&quot;428&quot; data-origin-height=&quot;214&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;평가가 한 줄로 끝나다니... 신세계였다&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;파라미터 수정&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;성능을 더 끌어올려보려는 시도로 파라미터를 조정해본다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778747050323&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf2 = AdaBoostClassifier(n_estimators=500, learning_rate=0.1)
clf2.fit(X_train, y_train)
pred2 = clf2.predict(X_test)
print(classification_report(y_test, pred2))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;430&quot; data-origin-height=&quot;205&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bDzpaj/dJMcadWh6T2/KbCvob70Au8dPsV6lYkxBK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bDzpaj/dJMcadWh6T2/KbCvob70Au8dPsV6lYkxBK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bDzpaj/dJMcadWh6T2/KbCvob70Au8dPsV6lYkxBK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbDzpaj%2FdJMcadWh6T2%2FKbCvob70Au8dPsV6lYkxBK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;294&quot; height=&quot;140&quot; data-origin-width=&quot;430&quot; data-origin-height=&quot;205&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;큰 차이가 없다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;estimator-max_depth 수정&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Decision Tree가 너무 단순한 형태라 생긴 문제일 수도 있기에, estimator의 값을 조정해보기로 했다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778747182849&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.tree import DecisionTreeClassifier

clf3 = AdaBoostClassifier(
    estimator = DecisionTreeClassifier(max_depth = 4),
    n_estimators = 500,
    learning_rate = 0.1
)

clf3.fit(X_train, y_train)
pred3 = clf3.predict(X_test)
print(classification_report(y_test, pred3))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;434&quot; data-origin-height=&quot;212&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bsisfV/dJMcahLctgc/kEv158jYOKDVNpwVrnuRyk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bsisfV/dJMcahLctgc/kEv158jYOKDVNpwVrnuRyk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bsisfV/dJMcahLctgc/kEv158jYOKDVNpwVrnuRyk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbsisfV%2FdJMcahLctgc%2FkEv158jYOKDVNpwVrnuRyk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;301&quot; height=&quot;147&quot; data-origin-width=&quot;434&quot; data-origin-height=&quot;212&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;성능이 많이 좋아졌다!&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;AdaBoost : 회귀&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778747721050&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_absolute_error, r2_score
car = pd.read_csv('../data/CarPrice_Assignment.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;데이터 전처리: 숫자 column만 필터링&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778747760944&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = car.select_dtypes('number')
df.drop('car_ID', axis=1, inplace = True)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;데이터 분할&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778747937180&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = df.drop('price', axis = 1)
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 42
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Serif KR';&quot;&gt;모델 생성, 학습, 예측, 평가&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778747990928&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;reg = AdaBoostRegressor()
reg.fit(X_train, y_train)
pred = reg.predict(X_test)
print('MAE: ', round(mean_absolute_error(y_test, pred), 2), '/ R&amp;sup2;: ', round(r2_score(y_test, pred)*100, 2))
# MAE:  2007.03 / R&amp;sup2;:  90.53&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;오차 시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778748077369&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 각 단계별 예측값을 이용하여 MAE를 구하고 그래프로 시각화
list(reg.staged_predict(X_test))

mae_list = []
for stage_pred in list(reg.staged_predict(X_test)):
    mae = mean_absolute_error(y_test, stage_pred)
    mae_list.append(mae)

# 시각화
plt.figure(figsize=(30, 10))
plt.plot(mae_list)
plt.xlabel('Count')
plt.ylabel('MAE')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리53.png&quot; data-origin-width=&quot;2409&quot; data-origin-height=&quot;833&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/kRF23/dJMcac39YRF/IkRdaMKfdsgbB2jOQyRr3k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/kRF23/dJMcac39YRF/IkRdaMKfdsgbB2jOQyRr3k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/kRF23/dJMcac39YRF/IkRdaMKfdsgbB2jOQyRr3k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FkRF23%2FdJMcac39YRF%2FIkRdaMKfdsgbB2jOQyRr3k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2409&quot; height=&quot;833&quot; data-filename=&quot;티스토리53.png&quot; data-origin-width=&quot;2409&quot; data-origin-height=&quot;833&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;변수 중요도 시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778748354754&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;importance = reg.feature_importances_
cols = X.columns

importance_df = pd.DataFrame(
    zip(cols, importance),
    columns = ['feature_name', 'importance']
)

importance_df.sort_values('importance', ascending = False, inplace=True)
importance_df.reset_index(drop = True, inplace = True)
importance_df&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;262&quot; data-origin-height=&quot;419&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/mvYzv/dJMcafT9V74/ml1Zta0QBw6o93cv7WkTo0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/mvYzv/dJMcafT9V74/ml1Zta0QBw6o93cv7WkTo0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/mvYzv/dJMcafT9V74/ml1Zta0QBw6o93cv7WkTo0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FmvYzv%2FdJMcafT9V74%2Fml1Zta0QBw6o93cv7WkTo0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;126&quot; height=&quot;202&quot; data-origin-width=&quot;262&quot; data-origin-height=&quot;419&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778748383909&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize = (25, 10))
plt.bar(importance_df['feature_name'], importance_df['importance'])
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리54.png&quot; data-origin-width=&quot;1989&quot; data-origin-height=&quot;813&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ug0qW/dJMcadBVML3/Xpf04JU3Dphx0p63VyCmk0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ug0qW/dJMcadBVML3/Xpf04JU3Dphx0p63VyCmk0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ug0qW/dJMcadBVML3/Xpf04JU3Dphx0p63VyCmk0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fug0qW%2FdJMcadBVML3%2FXpf04JU3Dphx0p63VyCmk0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1989&quot; height=&quot;813&quot; data-filename=&quot;티스토리54.png&quot; data-origin-width=&quot;1989&quot; data-origin-height=&quot;813&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 15일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의 소감:&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span&gt; 랜포는 신이다&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  RandomForest&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;body dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터셋 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778805614464&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

body = pd.read_csv('../data/bodyPerformance.csv')
body.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;214&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/kH52D/dJMcafGDiwh/wOMKXIvfjqP4wKLbfXhkT1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/kH52D/dJMcafGDiwh/wOMKXIvfjqP4wKLbfXhkT1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/kH52D/dJMcafGDiwh/wOMKXIvfjqP4wKLbfXhkT1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FkH52D%2FdJMcafGDiwh%2FwOMKXIvfjqP4wKLbfXhkT1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;581&quot; height=&quot;214&quot; data-origin-width=&quot;846&quot; data-origin-height=&quot;214&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리: LabelEncoder&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;저번과 마찬가지로 gender column과 class column을 숫자형 데이터로 변환해줄 것인데,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이번엔 LabelEncoder를 사용해보기로 하였다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778805869303&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# LabelEncoder &amp;rarr; 문자형 데이터들을 숫자형으로 변환
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
body['class_1'] = labelencoder.fit_transform(body['class'])
body['gender'] = labelencoder.fit_transform(body['gender'])
body.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1117&quot; data-origin-height=&quot;175&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bCuOgO/dJMcacC9gCs/qAkWHmUpKYrRqu5sPIu9h1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bCuOgO/dJMcacC9gCs/qAkWHmUpKYrRqu5sPIu9h1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bCuOgO/dJMcacC9gCs/qAkWHmUpKYrRqu5sPIu9h1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbCuOgO%2FdJMcacC9gCs%2FqAkWHmUpKYrRqu5sPIu9h1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1117&quot; height=&quot;175&quot; data-origin-width=&quot;1117&quot; data-origin-height=&quot;175&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;범주형 데이터를 숫자로 인식하게 되면 범주형 데이터간의 관계를 찾으려할 수 있으므로,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;독립적인 범주형 데이터에서는 사용하지 않는 것이 좋다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;sklearn.preprocessing에 속해 있기에 기본 동작은 scaler와 동일하다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1778806603069&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = body.drop(['class', 'class_1'], axis=1)
y = body['class_1']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 42, stratify = y&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;RandomForestClassifier 사용&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778806662069&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf = RandomForestClassifier()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)

print(classification_report(y_test, pred))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;425&quot; data-origin-height=&quot;209&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bC7dpK/dJMcacC9gC8/FbPqOb6zRsvgatIuOnwunK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bC7dpK/dJMcacC9gC8/FbPqOb6zRsvgatIuOnwunK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bC7dpK/dJMcacC9gC8/FbPqOb6zRsvgatIuOnwunK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbC7dpK%2FdJMcacC9gC8%2FFbPqOb6zRsvgatIuOnwunK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;309&quot; height=&quot;152&quot; data-origin-width=&quot;425&quot; data-origin-height=&quot;209&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;parameter 변경&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1.&amp;nbsp;모델의&amp;nbsp;개수를&amp;nbsp;증가 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2.&amp;nbsp;각&amp;nbsp;트리에서&amp;nbsp;사용되는&amp;nbsp;샘플을&amp;nbsp;70%로&amp;nbsp;제한 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;3.&amp;nbsp;`min_sample_leaf`를&amp;nbsp;기본값&amp;nbsp;1에서&amp;nbsp;4로&amp;nbsp;변경&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778806925029&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;clf2 = RandomForestClassifier(
    n_estimators = 200,
    max_samples = 1.0,
    min_samples_leaf = 1,
    max_features = 6
)

clf2.fit(X_train, y_train)
pred2 = clf2.predict(X_test)

print(classification_report(y_test, pred2))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;444&quot; data-origin-height=&quot;211&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/RxvIN/dJMcad22G0n/OzeizuC1Uixk6SrE4nrl0k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/RxvIN/dJMcad22G0n/OzeizuC1Uixk6SrE4nrl0k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/RxvIN/dJMcad22G0n/OzeizuC1Uixk6SrE4nrl0k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FRxvIN%2FdJMcad22G0n%2FOzeizuC1Uixk6SrE4nrl0k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;444&quot; height=&quot;211&quot; data-origin-width=&quot;444&quot; data-origin-height=&quot;211&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;값이 미세하게 증가했다.&lt;/span&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Pipeline&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778832985995&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

iris = pd.read_csv('../csv/iris.csv')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 전처리, 분할&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778833034388&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;le = LabelEncoder()
iris['species'] = le.fit_transform(iris['species'])
iris['species'].value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;228&quot; data-origin-height=&quot;100&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bS83f1/dJMcajoDiAz/tTHHJsotWdwLErKetv6slk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bS83f1/dJMcajoDiAz/tTHHJsotWdwLErKetv6slk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bS83f1/dJMcajoDiAz/tTHHJsotWdwLErKetv6slk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbS83f1%2FdJMcajoDiAz%2FtTHHJsotWdwLErKetv6slk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;228&quot; height=&quot;100&quot; data-origin-width=&quot;228&quot; data-origin-height=&quot;100&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778833070798&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = iris.drop('species', axis = 1)
y = iris['species']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42, stratify = y
)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;파이프 라인&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778833146736&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pipe = Pipeline(
    [
        ( 'scaler', StandardScaler() ),     # step 1
        ( 'model', SVC() )                  # step 2

    ], verbose = True
)

pipe.fit(X_train, y_train)
pred = pipe.predict(X_test)
print(classification_report(y_test, pred))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;435&quot; data-origin-height=&quot;193&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eKGafi/dJMcadhKgYW/uayJUwzFdmwYgQi63H6aJ1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eKGafi/dJMcadhKgYW/uayJUwzFdmwYgQi63H6aJ1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eKGafi/dJMcadhKgYW/uayJUwzFdmwYgQi63H6aJ1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeKGafi%2FdJMcadhKgYW%2FuayJUwzFdmwYgQi63H6aJ1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;338&quot; height=&quot;150&quot; data-origin-width=&quot;435&quot; data-origin-height=&quot;193&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  GridSearchCV&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그대로 iris dataset 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Pipeline이 여러 단계를 연속적으로 실행하는 자동화 도구였다면,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;GridSearch는 이 Pipeline을 활용해서 최적의 파라미터를 찾아준다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778833655856&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.model_selection import GridSearchCV&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;자세한 파라미터 설명은 K-fold를 결합한 부분에서 언급하겠다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778833643827&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# SVC 모델 생성

svc = SVC()

# svc에서 사용할 매개변수의 목록들을 작성
grid_dict = {
    'C' : [ 0.1, 1, 10 ],
    'kernel' : [ 'linear', 'rbf' ],
    'gamma' : [ 'scale', 'auto' ]
}

grid = GridSearchCV(
    estimator = svc,
    param_grid = grid_dict,
    cv = 5,
    scoring = 'accuracy',
    verbose = 1,
)


grid_dict&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;610&quot; data-origin-height=&quot;31&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KadGr/dJMb99TVNd3/zYqEkmkymNEeEstz1nREVK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KadGr/dJMb99TVNd3/zYqEkmkymNEeEstz1nREVK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KadGr/dJMb99TVNd3/zYqEkmkymNEeEstz1nREVK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKadGr%2FdJMb99TVNd3%2FzYqEkmkymNEeEstz1nREVK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;610&quot; height=&quot;31&quot; data-origin-width=&quot;610&quot; data-origin-height=&quot;31&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778833850821&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;grid.fit(X_train, y_train)


print('최적의 파라미터 조합: ', grid.best_params_)
print('최적의 점수: ', grid.best_score_)
print('최적의 모델: ', grid.best_estimator_)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;62&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/WLsK0/dJMcaffAYec/B72gA9Q0UOiMbK688lKq61/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/WLsK0/dJMcaffAYec/B72gA9Q0UOiMbK688lKq61/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/WLsK0/dJMcaffAYec/B72gA9Q0UOiMbK688lKq61/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FWLsK0%2FdJMcaffAYec%2FB72gA9Q0UOiMbK688lKq61%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;419&quot; height=&quot;47&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;62&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  K-Fold&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터셋을 K개의 동일 크기로 나눠서 K번 반복&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이때, 하나의 폴드를 검증용으로 사용, K-1개의 폴드를 학습용으로 사용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778834399597&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import numpy as np
from sklearn.model_selection import KFold, StratifiedKFold&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;K-Fold logic&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778834462962&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = np.array(
    ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
)

# KFold
kfold = KFold(n_splits = 5)

list(kfold.split(X))

for train_idx, test_idx in kfold.split(X):
    print('-' * 60)
    print(&quot;학습 데이터의 목록: &quot;, X[train_idx])
    print(&quot;검증 데이터의 목록: &quot;, X[test_idx])&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;501&quot; data-origin-height=&quot;298&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bW5Lzx/dJMcahxGx3j/72hKmlJfVtIrpwo60YgOTK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bW5Lzx/dJMcahxGx3j/72hKmlJfVtIrpwo60YgOTK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bW5Lzx/dJMcahxGx3j/72hKmlJfVtIrpwo60YgOTK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbW5Lzx%2FdJMcahxGx3j%2F72hKmlJfVtIrpwo60YgOTK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;333&quot; height=&quot;198&quot; data-origin-width=&quot;501&quot; data-origin-height=&quot;298&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;k-fold는 해당 로직으로 굴러간다는 것을 볼 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Pipeline + GridSearchCV + K-Fold&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1.&amp;nbsp;boston&amp;nbsp;데이터셋&amp;nbsp;로드 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2.&amp;nbsp;독립변수,&amp;nbsp;종속변수로&amp;nbsp;데이터&amp;nbsp;분할 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;3.&amp;nbsp;train,&amp;nbsp;test&amp;nbsp;8:2 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;4. &lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;K-Fold&lt;/b&gt; &lt;/span&gt;이용하여 10개의 폴드 생성 (shuffle=True) &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5. &lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;Pipeline&lt;/b&gt;&lt;/span&gt; 생성 (StandardScaler(), SVR()) &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;6.&amp;nbsp;파라미터&amp;nbsp;조합&amp;nbsp;생성&amp;nbsp;(svr&amp;nbsp;모델에서&amp;nbsp;C(1,&amp;nbsp;10,&amp;nbsp;100),&amp;nbsp;kernel('linear',&amp;nbsp;'rbf'),&amp;nbsp;epsilon(0.1,&amp;nbsp;0.2,&amp;nbsp;0.5)) &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;7.&amp;nbsp;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;GridSearchCV&lt;/b&gt;&lt;/span&gt;&amp;nbsp;베스트&amp;nbsp;모델&amp;nbsp;선정의&amp;nbsp;기준은&amp;nbsp;neg_mean_squared_error &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;8.&amp;nbsp;최고의&amp;nbsp;조합을&amp;nbsp;찾은&amp;nbsp;뒤&amp;nbsp;R2&amp;nbsp;score를&amp;nbsp;이용하여&amp;nbsp;성능을&amp;nbsp;확인 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;9.&amp;nbsp;최적의&amp;nbsp;파라미터를&amp;nbsp;이용하여&amp;nbsp;새로운&amp;nbsp;모델을&amp;nbsp;생성하고&amp;nbsp;성능&amp;nbsp;R2&amp;nbsp;score&amp;nbsp;확인&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778834646739&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 1)
boston = pd.read_csv('../csv/boston.csv')

# 2)
X = boston.drop('Price', axis = 1)
y = boston['Price']

# 3)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1778834767275&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 4)
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

# 5)
pipe = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('reg', SVR())
    ]
)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1778834799349&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 6) ~ # 7)
params = {
    'reg__C' : [1, 10, 100],
    'reg__epsilon' : [0.1, 0.2, 0.5],
    'reg__kernel' : ['linear', 'rbf']
}

grid_reg = GridSearchCV(
    estimator= pipe,               		# 파이프라인으로 생성한 모델을 지정
    param_grid= params,             		# dict 형태로 파라미터의 조합
    scoring = 'neg_mean_squared_error',         # 베스트 모델 선정 기준
    cv = kfold,               		   	# 생성해둔 KFold 지정
    refit = True,               		# 재학습 여부
    n_jobs = -1,                 	 	# 모든 CPU를 사용
    return_train_score= True,    		# 학습 데이터의 성능을 출력
    verbose = 2                    		# 로그 표시를 상세하게
)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1778835015851&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 8)
grid_reg.fit(X_train, y_train)
grid_reg.score(X_test, y_test)		# -12.063990309898506
print(grid_reg.best_estimator_)
	# Pipeline(steps=[('scaler', StandardScaler()), ('reg', SVR(C=100, epsilon=0.5))])

# 9)
pred = grid_reg.predict(X_test)
from sklearn.metrics import r2_score
print(round(r2_score(y_test, pred), 4))		# 0.8355&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;6주차 소감&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모델과 함께 몰아쳤던 6주차&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;만약 내가 기초 지식이 없었다면 견뎌낼 수 있었을까...? &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;ADsP 준비할 때의 기억이 새록새록했던 한 주였다. 그 땐 이론만으로도 머리 싸맸었는데...&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;빅분기 대비가 제대로 된 것 같아 뿌듯하다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #dddddd;&quot;&gt;&lt;i&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이제 더이상 sklearn의 서브 모듈 기억 못해서 로지스틱 못 불러오는 바보같은 짓은 안 하겠지&lt;/span&gt;&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;다음 주는 비지도 학습에 대해 배운 뒤 딥러닝으로 넘어갈 시간이다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;사실 딥러닝보다는 자연어 처리가 더 무섭긴 하지만...ㅋㅋㅋ 다음 주도 성실하게 참여하자!&lt;/span&gt;&lt;/p&gt;</description>
      <category>멀티캠퍼스부트캠프</category>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/6</guid>
      <comments>https://bbgw-oshoulder.tistory.com/6#entry6comment</comments>
      <pubDate>Sat, 16 May 2026 18:10:46 +0900</pubDate>
    </item>
    <item>
      <title>5주차 Note: 머신러닝</title>
      <link>https://bbgw-oshoulder.tistory.com/5</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;멀캠5.jpg&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/byVTyl/dJMcadaOvB3/KlgqVBgKNctiIpM8JkL3Hk/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/byVTyl/dJMcadaOvB3/KlgqVBgKNctiIpM8JkL3Hk/img.jpg&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/byVTyl/dJMcadaOvB3/KlgqVBgKNctiIpM8JkL3Hk/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbyVTyl%2FdJMcadaOvB3%2FKlgqVBgKNctiIpM8JkL3Hk%2Fimg.jpg&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;502&quot; height=&quot;268&quot; data-filename=&quot;멀캠5.jpg&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;5주차: 5월 4일 ~ 5월 8일&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt; 부트캠프 5주차 요약&lt;/b&gt;✍️&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/4 ] &lt;span style=&quot;color: #fe6b00;&quot;&gt;휴강&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/5 ] &lt;span style=&quot;color: #fe6b00;&quot;&gt;어린이날 휴강&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #000000;&quot;&gt;[ 5/6 ] &lt;b&gt;시각화&lt;/b&gt;: Looker Studio&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #000000;&quot;&gt;[ 5/7 ] &lt;b&gt;머신러닝&lt;/b&gt;: Outlier, dummies, 데이터 불균형, 데이터 Split&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #000000;&quot;&gt;[ 5/8 ] &lt;b&gt;머신러닝&lt;/b&gt;: 데이터 Scaling, Regression, Classification&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 6일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의&lt;/b&gt;&amp;nbsp;&lt;b&gt;소감&lt;/b&gt;: Looker는 Tableau와 PowerPoint를 결합한 느낌? UI는 Tableau가 더 예쁘긴 한데, 편의성은 Looker가 조금 더 좋은 듯하다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;둘 모두 시각화 툴이니만큼 크게 차이점이라 할 만한 점은 보이지 않았으니, 두 툴을 함께 공부해도 좋겠다는 생각이 들었다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;수업 시간에 실습한 내용&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;매출보고서.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1440&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bP7pzc/dJMcagZHTIy/0EtUJHKtE9RWIKS70k7SKk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bP7pzc/dJMcagZHTIy/0EtUJHKtE9RWIKS70k7SKk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bP7pzc/dJMcagZHTIy/0EtUJHKtE9RWIKS70k7SKk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbP7pzc%2FdJMcagZHTIy%2F0EtUJHKtE9RWIKS70k7SKk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1920&quot; height=&quot;1440&quot; data-filename=&quot;매출보고서.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1440&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;GA_Demo_AARRR.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;7498&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/et90iK/dJMcagZHTJU/RsdhsaECjCn0KMdWYudEg0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/et90iK/dJMcagZHTJU/RsdhsaECjCn0KMdWYudEg0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/et90iK/dJMcagZHTJU/RsdhsaECjCn0KMdWYudEg0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fet90iK%2FdJMcagZHTJU%2FRsdhsaECjCn0KMdWYudEg0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1920&quot; height=&quot;7498&quot; data-filename=&quot;GA_Demo_AARRR.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;7498&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Tableau는 피벗테이블의 느낌이었다면, Looker는 PPT 만드는 느낌이었달까?&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;두 툴들의 장점이 명확해서 자유자재로 활용할 수 있다면 정말 나의 분석에 날개를 달아줄 듯한 느낌이 들었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 7일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&amp;nbsp;&lt;b&gt;소감&lt;/b&gt;: 빅분기에서 사용할 수 있는 것들을 많이 배운 날! 시각화가 일찍 끝난 것은 아쉽지만...&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;학교에서 배웠던 머신러닝을 단단히 다진다 생각하고 열심히 하자!&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt; &amp;nbsp;&lt;b&gt;이상치&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;wine dataset을 활용한 이상치 처리&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778142570219&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine

wine_data = load_wine()
wine = pd.DataFrame(
    data = wine_data['data'],
    columns = wine_data['feature_names']
)
wine['class'] = wine_data['target']

wine&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1408&quot; data-origin-height=&quot;362&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/NTHRY/dJMcafzLr6A/gBInBadWvbkT1aXQ6jckOK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/NTHRY/dJMcafzLr6A/gBInBadWvbkT1aXQ6jckOK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/NTHRY/dJMcafzLr6A/gBInBadWvbkT1aXQ6jckOK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FNTHRY%2FdJMcafzLr6A%2FgBInBadWvbkT1aXQ6jckOK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1408&quot; height=&quot;362&quot; data-origin-width=&quot;1408&quot; data-origin-height=&quot;362&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;이상치 데이터를 확인&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778142449222&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;wine.describe()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1616&quot; data-origin-height=&quot;257&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bsCM34/dJMcaiwrPYJ/XIwzkWZaDIKl2Ydj5ka8F1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bsCM34/dJMcaiwrPYJ/XIwzkWZaDIKl2Ydj5ka8F1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bsCM34/dJMcaiwrPYJ/XIwzkWZaDIKl2Ydj5ka8F1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbsCM34%2FdJMcaiwrPYJ%2FXIwzkWZaDIKl2Ydj5ka8F1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1616&quot; height=&quot;257&quot; data-origin-width=&quot;1616&quot; data-origin-height=&quot;257&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;boxplot()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778142995723&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.boxplot(wine['color_intensity'])
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리37.png&quot; data-origin-width=&quot;543&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmAkOe/dJMcahEi2AG/DhOwFwYNy0IO6gTE7wsvD1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmAkOe/dJMcahEi2AG/DhOwFwYNy0IO6gTE7wsvD1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmAkOe/dJMcahEi2AG/DhOwFwYNy0IO6gTE7wsvD1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmAkOe%2FdJMcahEi2AG%2FDhOwFwYNy0IO6gTE7wsvD1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;408&quot; height=&quot;310&quot; data-filename=&quot;티스토리37.png&quot; data-origin-width=&quot;543&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래프로 보았을 때 이상치가 4개 정도 보인다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;이상치 필터링&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이상치만을 골라내기 위하여 사분위수, IQR을 계산한다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778143380804&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;q_1, q_3 = np.percentile(wine['color_intensity'], [25, 75])
print(q_1)		# 3.2199999999999998
print(q_3)		# 6.2

# IQR 계산
iqr = q_3 - q_1

# 상단 경계 계산
iqr_top = q_3 + 1.5*iqr
# 하단 경계 계산
iqr_bottom = q_1 - 1.5*iqr

print(iqr, iqr_top, iqr_bottom)
# 2.9800000000000004 10.670000000000002 -1.2500000000000009


# 위쪽 이상치 필터링
upper_flag = wine['color_intensity'] &amp;gt; iqr_top
# 아래쪽 이상치 필터링
lower_flag = wine['color_intensity'] &amp;lt; iqr_bottom

wine.loc[upper_flag | lower_flag, ]&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1415&quot; data-origin-height=&quot;147&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/27LJB/dJMb99M11K9/BYCuskcQ7HCJBADTYUwqZK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/27LJB/dJMb99M11K9/BYCuskcQ7HCJBADTYUwqZK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/27LJB/dJMb99M11K9/BYCuskcQ7HCJBADTYUwqZK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F27LJB%2FdJMb99M11K9%2FBYCuskcQ7HCJBADTYUwqZK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1415&quot; height=&quot;147&quot; data-origin-width=&quot;1415&quot; data-origin-height=&quot;147&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;이상치 제거, 대체&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778143603728&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 극단치 제거
df = wine.copy()
out_idx = df.loc[upper_flag | lower_flag, ].index
df.drop(out_idx, axis=0)

# 극단치 대체
# 상단의 경계에서 벗어난 데이터는 상단의 경계 값으로 채워주고
# 하단의 경계에서 벗어난 데이터는 하단의 경계 값으로 채워준다.
df2 = wine.copy()
df2.loc[upper_flag, 'color_intensity'] = iqr_top

# 검증용 boxplot
plt.boxplot(df2['color_intensity'])
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리38.png&quot; data-origin-width=&quot;543&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/TpDIa/dJMcajhN8zG/zD7QgWBJAdDXSc4y8SQv7K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/TpDIa/dJMcajhN8zG/zD7QgWBJAdDXSc4y8SQv7K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/TpDIa/dJMcajhN8zG/zD7QgWBJAdDXSc4y8SQv7K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FTpDIa%2FdJMcajhN8zG%2FzD7QgWBJAdDXSc4y8SQv7K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;376&quot; height=&quot;286&quot; data-filename=&quot;티스토리38.png&quot; data-origin-width=&quot;543&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이상치가 제거된 것을 확인할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  범주형 데이터의 변환&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;더미 변수 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778150064900&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# class column의 값들을 범주형으로 변경
wine['class'] = wine['class'].map(
    lambda x: wine_data['target_names'][x]
)

# 더미 변수 생성
dummie_df = pd.get_dummies(wine, columns = ['class'])
dummie_df&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1655&quot; data-origin-height=&quot;370&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ROd5k/dJMcagyDtU0/KY16HTuB63FBGLJVv6pgt0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ROd5k/dJMcagyDtU0/KY16HTuB63FBGLJVv6pgt0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ROd5k/dJMcagyDtU0/KY16HTuB63FBGLJVv6pgt0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FROd5k%2FdJMcagyDtU0%2FKY16HTuB63FBGLJVv6pgt0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1655&quot; height=&quot;370&quot; data-origin-width=&quot;1655&quot; data-origin-height=&quot;370&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  데이터의 불균형 해소&lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;샘플링: 다수 데이터와 소수 데이터를 특정 비율로 조절해주는 기법&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778151224827&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.datasets import make_classification
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;make_classification()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;make_classification() : 데이터가&amp;nbsp;불균형한&amp;nbsp;랜덤&amp;nbsp;데이터를&amp;nbsp;생성&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778201650846&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x, y = make_classification(
    n_samples= 1000,
    n_features= 5,
    weights= [0.9],
    flip_y= 0
)

# Counter: 숫자 세는 기능
Counter(y)		# Counter({np.int64(0): 901, np.int64(1): 99})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터 프레임으로 엮기&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778203572471&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.DataFrame(x)
df['target'] = y

df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;언더 샘플링: RandomUnderSampler()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;언더 샘플링&lt;/b&gt;: 다수의 라벨을 가진 데이터를 샘플링하여 소수의 데이터의 수준으로 감소시키는 방법&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778203735833&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# RandomUnderSampler 라는 class를 생성
undersampler = RandomUnderSampler()

under_x, under_y = undersampler.fit_resample(x, y)

Counter(under_y)		# Counter({np.int64(0): 99, np.int64(1): 99})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;undersampler에서&amp;nbsp;데이터의&amp;nbsp;비율을&amp;nbsp;변경 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;&lt;b&gt;sampling_strategy&lt;/b&gt;&lt;/i&gt;&amp;nbsp;매개변수&amp;nbsp;&amp;rarr;&amp;nbsp;소수&amp;nbsp;데이터의&amp;nbsp;비율을&amp;nbsp;의미,&amp;nbsp;0.5면&amp;nbsp;2배&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778204041443&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;undersampler2 = RandomUnderSampler(sampling_strategy = 0.3)
under_x2, under_y2 = undersampler2.fit_resample(x, y)
Counter(under_y2)		# Counter({np.int64(0): 330, np.int64(1): 99})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;오버 샘플링: RandomOverSampler()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오버 샘플링&lt;/b&gt;: 소수&amp;nbsp;데이터를&amp;nbsp;다수&amp;nbsp;데이터&amp;nbsp;개수만큼&amp;nbsp;증가시켜&amp;nbsp;학습에&amp;nbsp;사용하기&amp;nbsp;위한&amp;nbsp;방법&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;랜덤 오버 샘플링&lt;/b&gt;: 소수의&amp;nbsp;데이터를&amp;nbsp;단순&amp;nbsp;복제하여&amp;nbsp;다수의&amp;nbsp;데이터와의&amp;nbsp;비율을&amp;nbsp;맞춰주는&amp;nbsp;과정&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778204633550&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from imblearn.over_sampling import RandomOverSampler
oversampler = RandomOverSampler()
over_x, over_y = oversampler.fit_resample(x, y)
Counter(over_y)		# Counter({np.int64(0): 901, np.int64(1): 901})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778204818455&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 소수의 데이터 비율을 다수의 반 정도로 샘플링

oversampler2 = RandomOverSampler(sampling_strategy=0.5)
over_x2, over_y2 = oversampler2.fit_resample(x, y)
Counter(over_y2)		# Counter({np.int64(0): 901, np.int64(1): 450})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;오버 샘플링: SMOTE()&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;SMOTE&lt;/b&gt;:&amp;nbsp;소수&amp;nbsp;데이터의&amp;nbsp;관측값에&amp;nbsp;대한&amp;nbsp;K개의&amp;nbsp;최근접&amp;nbsp;양수를&amp;nbsp;이웃으로&amp;nbsp;찾고,&amp;nbsp;관측&amp;nbsp;값과&amp;nbsp;이웃으로&amp;nbsp;선택된&amp;nbsp;값&amp;nbsp;사이에&amp;nbsp;임의의&amp;nbsp;새로운&amp;nbsp;데이터를&amp;nbsp;생성하는&amp;nbsp;방법&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778204978471&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from imblearn.over_sampling import SMOTE
smote = SMOTE()
sm_x, sm_y = smote.fit_resample(x, y)
Counter(sm_y)		# Counter({np.int64(0): 901, np.int64(1): 901})&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;SMOTE 결과 시각화&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778205043442&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# 4개의 산점도 그래프 생성
sns.scatterplot(
    x = x[:, 2], y = x[:, 3], ax = axes[0][0], hue = y, alpha = 0.4
)

sns.scatterplot(
    x = under_x[:, 2], y = under_x[:, 3], ax = axes[0][1], hue = under_y, alpha = 0.4
)

sns.scatterplot(
    x = over_x[:, 2], y = over_x[:, 3], ax = axes[1][0], hue = over_y, alpha = 0.4
)

sns.scatterplot(
    x = sm_x[:, 2], y = sm_x[:, 3], ax = axes[1][1], hue = sm_y, alpha = 0.4
)

axes[0][0].set_title('Origin Data')
axes[0][1].set_title('Random Under Sample')
axes[0][1].set_xlim(-3.5, 3.5)
axes[0][1].set_ylim(-4.5, 4.5)
axes[1][0].set_title('Random Over Sample')
axes[1][1].set_title('SMOTE')

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리39.png&quot; data-origin-width=&quot;1212&quot; data-origin-height=&quot;1221&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eEVUkA/dJMcabqBRdv/JflORmJAvc5vQz3bVPvctK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eEVUkA/dJMcabqBRdv/JflORmJAvc5vQz3bVPvctK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eEVUkA/dJMcabqBRdv/JflORmJAvc5vQz3bVPvctK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeEVUkA%2FdJMcabqBRdv%2FJflORmJAvc5vQz3bVPvctK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;556&quot; height=&quot;560&quot; data-filename=&quot;티스토리39.png&quot; data-origin-width=&quot;1212&quot; data-origin-height=&quot;1221&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  데이터 분할 (Split)&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;iris dataset을 활용한 이상치 처리 &lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터셋 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778205161597&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris_data = load_iris()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터셋 전처리: 정리, 이상치 처리&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778205334787&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;iris = pd.DataFrame(iris_data['data'], columns = iris_data['feature_names'])
iris['class'] = iris_data['target']

# target 데이터들의 이름을 변환
iris['class'] = iris['class'].map(
    lambda x: iris_data['target_names'][x]
)

iris&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리40.png&quot; data-origin-width=&quot;628&quot; data-origin-height=&quot;363&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/uPxWw/dJMcadu6ne2/I9d6SnJbESVdQNIardwCnK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/uPxWw/dJMcadu6ne2/I9d6SnJbESVdQNIardwCnK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/uPxWw/dJMcadu6ne2/I9d6SnJbESVdQNIardwCnK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FuPxWw%2FdJMcadu6ne2%2FI9d6SnJbESVdQNIardwCnK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;346&quot; height=&quot;200&quot; data-filename=&quot;티스토리40.png&quot; data-origin-width=&quot;628&quot; data-origin-height=&quot;363&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 Split: train_test_split()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778205835419&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = iris.drop('class', axis=1)
y = iris['class']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 42
)

# 데이터의 개수를 확인
print(f'X_train: {X_train.shape}, X_test: {X_test.shape}')
	# X_train: (105, 4), X_test: (45, 4)
print(f'y_train: {y_train.shape}, y_test: {y_test.shape}')
	# y_train: (105,), y_test: (45,)
    
y_train.value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;212&quot; data-origin-height=&quot;107&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ck0uCa/dJMcaiQKq9o/yQfHlsMNCd807HPTOE9P90/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ck0uCa/dJMcaiQKq9o/yQfHlsMNCd807HPTOE9P90/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ck0uCa/dJMcaiQKq9o/yQfHlsMNCd807HPTOE9P90/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fck0uCa%2FdJMcaiQKq9o%2FyQfHlsMNCd807HPTOE9P90%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;212&quot; height=&quot;107&quot; data-origin-width=&quot;212&quot; data-origin-height=&quot;107&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;train_test_split()의 매개변수&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;test_size&lt;/b&gt;:&amp;nbsp;test&amp;nbsp;데이터의&amp;nbsp;비율&amp;nbsp;(0~1) &lt;br /&gt;&lt;b&gt;random_state&lt;/b&gt;:&amp;nbsp;임의의&amp;nbsp;데이터를&amp;nbsp;추출하는&amp;nbsp;과정에서&amp;nbsp;seed&amp;nbsp;값을&amp;nbsp;지정 &lt;br /&gt;&lt;b&gt;shuffle&lt;/b&gt;:&amp;nbsp;데이터를&amp;nbsp;섞을&amp;nbsp;것인가(시계열&amp;nbsp;데이터셋인&amp;nbsp;경우에는&amp;nbsp;False) &lt;br /&gt;&lt;b&gt;stratify&lt;/b&gt;:&amp;nbsp;특정&amp;nbsp;변수를&amp;nbsp;지정하면&amp;nbsp;해당&amp;nbsp;변수를&amp;nbsp;기준으로&amp;nbsp;계층화.&amp;nbsp;해당&amp;nbsp;변수의&amp;nbsp;비율을&amp;nbsp;유지하도록&amp;nbsp;데이터&amp;nbsp;분할&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778206046992&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train2, X_test2, y_train2, y_test2 = train_test_split(
    X, y, test_size = 0.3, shuffle=False, random_state = 42
)

y_train2.value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;208&quot; data-origin-height=&quot;102&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/vYEuA/dJMcadPpBeK/pbVaMZKRiCOKvWcvjQf0l1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/vYEuA/dJMcadPpBeK/pbVaMZKRiCOKvWcvjQf0l1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/vYEuA/dJMcadPpBeK/pbVaMZKRiCOKvWcvjQf0l1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FvYEuA%2FdJMcadPpBeK%2FpbVaMZKRiCOKvWcvjQf0l1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;208&quot; height=&quot;102&quot; data-origin-width=&quot;208&quot; data-origin-height=&quot;102&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778206166706&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train3, X_test3, y_train3, y_test3 = train_test_split(
    X, y, test_size = 0.3, stratify=y, random_state=42
)

y_train3.value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;212&quot; data-origin-height=&quot;102&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dDhS4t/dJMcajhOAIx/fUhjUu4cNDsrh9KMKKnBN0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dDhS4t/dJMcajhOAIx/fUhjUu4cNDsrh9KMKKnBN0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dDhS4t/dJMcajhOAIx/fUhjUu4cNDsrh9KMKKnBN0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdDhS4t%2FdJMcajhOAIx%2FfUhjUu4cNDsrh9KMKKnBN0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;212&quot; height=&quot;102&quot; data-origin-width=&quot;212&quot; data-origin-height=&quot;102&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;5월 8일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오늘의&lt;/span&gt;&lt;/b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&amp;nbsp;&lt;b&gt;소감&lt;/b&gt;:&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;&lt;b&gt;  데이터 Scaling&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터들의 최솟값과 최댓값, 평균, 표준편차를 출력하는 함수 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778208017657&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def scaler_print(train, test):
    print(f&quot;&quot;&quot;
    Train Data:
        Min : {round(train.min(), 2)}
        Max : {round(train.max(), 2)}
        Mean: {round(train.mean(), 2)}
        Std : {round(train.std(), 2)}
&quot;&quot;&quot;)

    print(f&quot;&quot;&quot;
    Test Data:
        Min : {round(test.min(), 2)}
        Max : {round(test.max(), 2)}
        Mean: {round(test.mean(), 2)}
        Std : {round(test.std(), 2)}
&quot;&quot;&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Standard Scaler&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;평균이&amp;nbsp;0,&amp;nbsp;분산이&amp;nbsp;1인&amp;nbsp;정규분포로&amp;nbsp;스케일링&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778207960493&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.preprocessing import StandardScaler
stdscaler = StandardScaler()

X_train_sc = stdscaler.fit_transform(X_train3)
X_test_sc = stdscaler.transform(X_test3)
scaler_print(X_train_sc, X_test_sc)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;172&quot; data-origin-height=&quot;260&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bzhFNE/dJMcabKV0r2/z3ph52ccOI4kj4MMbg0Rpk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bzhFNE/dJMcabKV0r2/z3ph52ccOI4kj4MMbg0Rpk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bzhFNE/dJMcabKV0r2/z3ph52ccOI4kj4MMbg0Rpk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbzhFNE%2FdJMcabKV0r2%2Fz3ph52ccOI4kj4MMbg0Rpk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;114&quot; height=&quot;172&quot; data-origin-width=&quot;172&quot; data-origin-height=&quot;260&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Min-Max Scaler&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778208589413&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.preprocessing import MinMaxScaler

mnscaler = MinMaxScaler()

X_train_sc = mnscaler.fit_transform(X_train3)
X_test_sc = mnscaler.transform(X_test3)

scaler_print(X_train_sc, X_test_sc)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;142&quot; data-origin-height=&quot;251&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YnhP6/dJMcabxkGGe/jX2XkKjhXUyEI3OVxKgYg1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YnhP6/dJMcabxkGGe/jX2XkKjhXUyEI3OVxKgYg1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YnhP6/dJMcabxkGGe/jX2XkKjhXUyEI3OVxKgYg1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYnhP6%2FdJMcabxkGGe%2FjX2XkKjhXUyEI3OVxKgYg1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;111&quot; height=&quot;196&quot; data-origin-width=&quot;142&quot; data-origin-height=&quot;251&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Max Abs Scaler&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778208660324&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.preprocessing import MaxAbsScaler
mascaler = MaxAbsScaler()

X_train_sc = mascaler.fit_transform(X_train3)
X_test_sc = mascaler.transform(X_test3)

scaler_print(X_train_sc, X_test_sc)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;157&quot; data-origin-height=&quot;252&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KGh53/dJMcagZHMFk/r3Mv3vLqNF9LRytLUjirWk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KGh53/dJMcagZHMFk/r3Mv3vLqNF9LRytLUjirWk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KGh53/dJMcagZHMFk/r3Mv3vLqNF9LRytLUjirWk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKGh53%2FdJMcagZHMFk%2Fr3Mv3vLqNF9LRytLUjirWk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;120&quot; height=&quot;193&quot; data-origin-width=&quot;157&quot; data-origin-height=&quot;252&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Robust Scaler&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;Robust Scaler&lt;/b&gt;: 평균과 분산을 이용한 Standard 대신에 중앙값과 사분위수를 활용하는 방식 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;중앙값을&amp;nbsp;0으로&amp;nbsp;설정,&amp;nbsp;IQR을&amp;nbsp;사용하여&amp;nbsp;이상치의&amp;nbsp;영향을&amp;nbsp;최소화 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;quantile_range&lt;/b&gt;&amp;nbsp;매개변수(기본값&amp;nbsp;(25.0,&amp;nbsp;75.0)):&amp;nbsp;더&amp;nbsp;넓거나&amp;nbsp;좁은&amp;nbsp;범위의&amp;nbsp;값을&amp;nbsp;이상치로&amp;nbsp;판단하게&amp;nbsp;할&amp;nbsp;수&amp;nbsp;있다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778213116039&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sklearn.preprocessing import RobustScaler
ruscaler = RobustScaler()
ruscaler2 = RobustScaler(quantile_range=(20, 80))

X_train_sc = ruscaler.fit_transform(X_train3)
X_test_sc = ruscaler.transform(X_test3)
scaler_print(X_train_sc, X_test_sc)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;152&quot; data-origin-height=&quot;257&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bl54hj/dJMcahj4Uch/KQRm01AkCa5L7mEURanjpK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bl54hj/dJMcahj4Uch/KQRm01AkCa5L7mEURanjpK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bl54hj/dJMcahj4Uch/KQRm01AkCa5L7mEURanjpK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbl54hj%2FdJMcahj4Uch%2FKQRm01AkCa5L7mEURanjpK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;109&quot; height=&quot;184&quot; data-origin-width=&quot;152&quot; data-origin-height=&quot;257&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778213243482&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_train_sc = ruscaler2.fit_transform(X_train3)
X_train_sc = ruscaler2.transform(X_test3)
scaler_print(X_train_sc, X_test_sc)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;151&quot; data-origin-height=&quot;261&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bTEdDf/dJMcaaZvsf0/KKpdkKcIe5DhqeItt5Pw80/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bTEdDf/dJMcaaZvsf0/KKpdkKcIe5DhqeItt5Pw80/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bTEdDf/dJMcaaZvsf0/KKpdkKcIe5DhqeItt5Pw80/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbTEdDf%2FdJMcaaZvsf0%2FKKpdkKcIe5DhqeItt5Pw80%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;108&quot; height=&quot;187&quot; data-origin-width=&quot;151&quot; data-origin-height=&quot;261&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;inverse_transform&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;inverse_transform&lt;/b&gt;: 스케일링된 데이터를 원본 데이터로 복원&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1778213359454&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X_origin = ruscaler2.inverse_transform(X_train_sc)
pd.DataFrame(X_origin).head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;170&quot; data-origin-height=&quot;171&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bHuQE8/dJMcafmeSs4/w1ulpGZDyrekLbrsiCm4S0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bHuQE8/dJMcafmeSs4/w1ulpGZDyrekLbrsiCm4S0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bHuQE8/dJMcafmeSs4/w1ulpGZDyrekLbrsiCm4S0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbHuQE8%2FdJMcafmeSs4%2Fw1ulpGZDyrekLbrsiCm4S0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;129&quot; height=&quot;130&quot; data-origin-width=&quot;170&quot; data-origin-height=&quot;171&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  회귀 분석&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;boston dataset을 활용한 회귀 분석&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778225194661&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error, \
    mean_squared_error, mean_squared_log_error, root_mean_squared_error
    
boston = pd.read_csv('../csv/boston.csv')
boston.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;777&quot; data-origin-height=&quot;172&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bFL6Ti/dJMcafNk02Y/ryvrq8k1IPl0ZT7cPkWPP1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bFL6Ti/dJMcafNk02Y/ryvrq8k1IPl0ZT7cPkWPP1/img.png&quot; data-alt=&quot;빨간색 박스 부분이 종속 변수이다.&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bFL6Ti/dJMcafNk02Y/ryvrq8k1IPl0ZT7cPkWPP1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbFL6Ti%2FdJMcafNk02Y%2Fryvrq8k1IPl0ZT7cPkWPP1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;777&quot; height=&quot;172&quot; data-origin-width=&quot;777&quot; data-origin-height=&quot;172&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;빨간색 박스 부분이 종속 변수이다.&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;독립변수, 종속변수 분할 후 데이터 분할&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778226476848&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = boston.drop(columns=['CHAS', 'Price'])
y = boston['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;모델 생성, 학습, 예측&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778226825299&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 모델 생성
lr = LinearRegression()

# 모델 학습
lr.fit(X_train, y_train)

# 예측
pred = lr.predict(X_test)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;모델 평가&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;성능 평가 지표&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;MAE&lt;/b&gt;:&amp;nbsp;|&amp;nbsp;실제값&amp;nbsp;-&amp;nbsp;예측값&amp;nbsp;| &lt;br /&gt;&lt;b&gt;MSE&lt;/b&gt;:&amp;nbsp;MEAN(&amp;nbsp;(&amp;nbsp;실제값-예측값&amp;nbsp;)&amp;sup2;&amp;nbsp;) &lt;br /&gt;&lt;b&gt;RMSE&lt;/b&gt;:&amp;nbsp;&amp;radic;&amp;nbsp;MSE &lt;br /&gt;&lt;b&gt;MSLE&lt;/b&gt;:&amp;nbsp;log(&amp;nbsp;MSE&amp;nbsp;) &lt;br /&gt;&lt;b&gt;MAPE&lt;/b&gt;:&amp;nbsp;MAE&amp;nbsp;% &lt;br /&gt;&lt;b&gt;R&amp;sup2;&amp;nbsp;Score&lt;/b&gt;:&amp;nbsp;SSR&amp;nbsp;/&amp;nbsp;SST&lt;/p&gt;
&lt;pre id=&quot;code_1778227497866&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;mae = mean_absolute_error(y_test, pred)
mse = mean_squared_error(y_test, pred)
rmse = root_mean_squared_error(y_test, pred)
mape = mean_absolute_percentage_error(y_test, pred)
r2 = r2_score(y_test, pred)

print(f&quot;MAE: {round(mae, 2)}&quot;)				# 3.24
print(f&quot;MSE: {round(mse, 2)}&quot;)				# 24.64
print(f&quot;RMSE: {round(rmse, 2)}&quot;)			# 4.96
print(f&quot;MAPE: {round(mape, 2)*100}%&quot;)			# 17.0%
print(f&quot;R2 Score: {round(r2, 2)}&quot;)			# 0.66&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  분류 분석&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;iris dataset을 활용한 분류 분석&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;라이브러리, 데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778228901630&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier, plot_tree
import pandas as pd

iris = pd.read_csv('../csv/iris.csv')
iris.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;477&quot; data-origin-height=&quot;182&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Bs0GO/dJMcac35ltz/gRNUo0ZpgpDZwo5yKiUQY0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Bs0GO/dJMcac35ltz/gRNUo0ZpgpDZwo5yKiUQY0/img.png&quot; data-alt=&quot;빨간색 박스 부분이 종속 변수이다.&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Bs0GO/dJMcac35ltz/gRNUo0ZpgpDZwo5yKiUQY0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FBs0GO%2FdJMcac35ltz%2FgRNUo0ZpgpDZwo5yKiUQY0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;477&quot; height=&quot;182&quot; data-origin-width=&quot;477&quot; data-origin-height=&quot;182&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;빨간색 박스 부분이 종속 변수이다.&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt; &lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;독립변수, 종속변수 분할&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778228961602&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;X = iris.drop('species', axis=1)
y = iris['species']&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;모델 생성, 학습, 예측&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778229009252&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 모델 생성
clf = DecisionTreeClassifier(max_depth=3, random_state=42)

# 모델 학습
clf.fit(X, y)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;트리 구조 시각화&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778229044059&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;feature_names = X.columns

plt.figure(figsize=(15, 15))
plot_tree(clf, feature_names=feature_names, class_names=iris['species'].unique(), fontsize=10, filled=True)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리41.png&quot; data-origin-width=&quot;1182&quot; data-origin-height=&quot;1175&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/caTc0H/dJMcaipJcxh/89j5PJuUH0qWOEr7ogPzE0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/caTc0H/dJMcaipJcxh/89j5PJuUH0qWOEr7ogPzE0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/caTc0H/dJMcaipJcxh/89j5PJuUH0qWOEr7ogPzE0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcaTc0H%2FdJMcaipJcxh%2F89j5PJuUH0qWOEr7ogPzE0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1182&quot; height=&quot;1175&quot; data-filename=&quot;티스토리41.png&quot; data-origin-width=&quot;1182&quot; data-origin-height=&quot;1175&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;train, test 데이터 분할 후 모델 적합, 예측, 평가&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778229137634&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 라이브러리 로드
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# 분류 모델이기에 계층화 적용
# split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 모델 적합
clf.fit(X_train, y_train)

# 예측
pred = clf.predict(X_test)

# 평가
accuracy_score(y_test, pred)		# 0.9666666666666667
confusion_matrix(y_test, pred)
# array([[10,  0,  0],
#        [ 0,  9,  1],
#        [ 0,  0, 10]])


# 시각화
plt.figure(figsize=(15, 15))
plot_tree(clf, feature_names=X.columns, class_names=iris['species'].unique(), fontsize=10, filled=True)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리42.png&quot; data-origin-width=&quot;1182&quot; data-origin-height=&quot;1175&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/btlgQ2/dJMcacXkw7o/ooRejOv5IcnwXtdmSLXMc0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/btlgQ2/dJMcacXkw7o/ooRejOv5IcnwXtdmSLXMc0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/btlgQ2/dJMcacXkw7o/ooRejOv5IcnwXtdmSLXMc0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbtlgQ2%2FdJMcacXkw7o%2FooRejOv5IcnwXtdmSLXMc0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1182&quot; height=&quot;1175&quot; data-filename=&quot;티스토리42.png&quot; data-origin-width=&quot;1182&quot; data-origin-height=&quot;1175&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;5주차 소감&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;연휴가 있어서 빠르게 지나갔던 5주차!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;6주차부터는 본격적인 ML, DL, 자연어 처리를 하게 될 것을 생각하니 새삼 시간이 빠르다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;곧 빅분기 실기 접수도 있으니만큼, 이번에 공부하는 내용들로 sklearn에 대한 지식을 확고히 하자.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #dddddd;&quot;&gt;&lt;s&gt;&lt;i&gt;저번 시험처럼 linear_model 생각 안 나서 모델 못 쓰지 말고&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;너무 자동완성에 의존하지 말고, 강사님 따라서 친다고도 생각하지 말고... 내가 내 코드를 건축하는 느낌으로 내것으로 만들자!&lt;/span&gt;&lt;/p&gt;</description>
      <category>멀티캠퍼스부트캠프</category>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/5</guid>
      <comments>https://bbgw-oshoulder.tistory.com/5#entry5comment</comments>
      <pubDate>Fri, 8 May 2026 18:07:36 +0900</pubDate>
    </item>
    <item>
      <title>4주차 Note: 데이터 시각화</title>
      <link>https://bbgw-oshoulder.tistory.com/4</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;멀캠4.jpg&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ozMVm/dJMcaffqCBM/8ogTwUJLhrsrNai6jGVEhk/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ozMVm/dJMcaffqCBM/8ogTwUJLhrsrNai6jGVEhk/img.jpg&quot; data-alt=&quot;똥손이슈 반짝눈 대실패&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ozMVm/dJMcaffqCBM/8ogTwUJLhrsrNai6jGVEhk/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FozMVm%2FdJMcaffqCBM%2F8ogTwUJLhrsrNai6jGVEhk%2Fimg.jpg&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;497&quot; height=&quot;265&quot; data-filename=&quot;멀캠4.jpg&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;똥손이슈 반짝눈 대실패&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #666666; font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt; ️ 4주차: 4월 27일 - 4월 30일&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt; 부트캠프 4주차 요약✍️&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 4/27 ] &lt;b&gt;Web&lt;/b&gt;: Flask&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 4/28 ] &lt;b&gt;웹, 데이터 시각화&lt;/b&gt;: PythonAnywhere, matplotlib, drinks 데이터셋 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 4/29 ]&amp;nbsp;&lt;b&gt;데이터 시각화&lt;/b&gt;: offline, online, aapl 데이터셋 활용&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 4/30 ]&amp;nbsp;&lt;b&gt;데이터 시각화&lt;/b&gt;: tableau, GA&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[ 5/1 ] &lt;span style=&quot;color: #fe6b00;&quot;&gt;근로자의 날 휴강&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;4월 27일  &amp;zwj; &lt;/b&gt;&lt;/span&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 코드가 길어지면 어지럽기는 마찬가지...  py js html 문법 통일해줬으면  (그럴리가...)&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;본격적인 시각화 툴을 다루기 전, 시각화 결과를 구현할 수 있는 웹을 구축하는 방법을 배웠다.&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘 사용한 파일들&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;▪️&lt;b&gt;app.py&lt;/b&gt; (이번 시간에 새로 생성한 웹 구축 모듈)&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;▪️&lt;b&gt;db.py&lt;/b&gt; (4/20일에 만들어둔 sql 연결 모듈)&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;▪️&lt;b&gt;index.html, main.html&lt;/b&gt; (웹 구현 html 코드)&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777279704246&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# flask 프레임워크 안에서 특정 기능을 로드
from flask import Flask, render_template, request, redirect, url_for
# render_template &amp;rarr; templates 폴더 안에 html 문서를 가져오기 위한 기능
# request &amp;rarr; 유저가 보낸 데이터에 접근하기 위한 기능
# redirect &amp;rarr; 특정 주소로 이동
# url_for &amp;rarr; 특정 주소를 지정하기 위한 기능
    # (상대 경로 지정 &amp;rarr; app.py 위치로부터 경로를 설정)
    # html 문서 안에서 사용 (render_template() 함수를 이용해서 app.py에서 실행)

from db import MyDB
from querys import user
import pandas as pd&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #666666;&quot;&gt;환경 구축&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777298771851&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# Flask class 생성 &amp;rarr; 웹 서버를 구축 기능
# class 생성 시 생성자 함수 호출
    # 필수 인자 1개 &amp;rarr; 현재 실행이 되는 파일의 이름(app.py)
    # 파일의 이름을 그대로 사용할 시 파일의 이름이 변경될 때 매번 수정 작업 필요
    # __name__ &amp;rarr; 현재 파일의 이름
    
app = Flask(__name__)
db = MyDB()
db.sql_query(user.create_query)
db.commit()&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777299320963&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 회원 테이블 생성 쿼리문 하나 작성
create_query = &quot;&quot;&quot;
    CREATE TABLE IF NOT EXISTS `user` (
        `id` varchar(32) primary key,
        `password` varchar(32) not null,
        `name` varchar(32)
    )
&quot;&quot;&quot;

# 로그인 조회 쿼리문 작성
# id, password를 이용하여 데이터가 존재하는가?
login_query = &quot;&quot;&quot;
    SELECT *
    FROM `user`
    WHERE `id` = %s AND `password` = %s
&quot;&quot;&quot;

# 회원 가입 쿼리문
    # 아이디 중복 체크
    # id, password, name 입력하고 회원가입
id_check_query = &quot;&quot;&quot;
    SELECT *
    FROM `user`
    WHERE `id` = %s
&quot;&quot;&quot;

signup_query = &quot;&quot;&quot;
    INSERT INTO `user` VALUES (%s, %s, %s)
&quot;&quot;&quot;&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #666666;&quot;&gt;API 생성 1: index()&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777298901406&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 웹 서버의 api 목록들 생성
# 데코레이션(내비게이션 함수) &amp;rarr; @함수
    # 특정 주소와 함수를 연결
    # 주소 &amp;rarr; base_url(127.0.0.1:5000) + sub_url()
        # 이 주소로 요청이 들어왔을 때 함수를 호출
        
@app.route(&quot;/&quot;)
def index():
    # HTML로 하이퍼 링크 생성
    # return &quot;&amp;lt;a href='https://www.google.com'&amp;gt;Google&amp;lt;/a&amp;gt;&quot;
    return render_template(&quot;index.html&quot;)
    # templates 폴더 안에 html 문서를 불러와서 되돌려준다.(render_template())
    # render_template에서 {{변수명}}은 python에서의 변수를 담겠다.
    # {%python code} code를 인식하여 조건에 맞는 경우에만 html을 추가
        # 반복문을 이용해서 html을 반복적으로 추가&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;index.html&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777299802352&quot; class=&quot;html xml&quot; data-ke-language=&quot;html&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&quot;en&quot;&amp;gt;
    &amp;lt;head&amp;gt;
        &amp;lt;meta charset=&quot;UTF-8&quot;&amp;gt;
        &amp;lt;meta name=&quot;viewport&quot; content=&quot;width=device-width, initial-scale=1.0&quot;&amp;gt;
        &amp;lt;!-- title 태그는 탭에 나타나는 이름 --&amp;gt;
        &amp;lt;title&amp;gt;Document&amp;lt;/title&amp;gt;
    &amp;lt;/head&amp;gt;
    &amp;lt;body&amp;gt;
        &amp;lt;!-- 실제 화면에 나타나는 부분 --&amp;gt;
         &amp;lt;h1&amp;gt;Hello Flask&amp;lt;/h1&amp;gt;
         &amp;lt;p&amp;gt;이 페이지는 메인 페이지입니다.&amp;lt;/p&amp;gt;
        &amp;lt;!-- 유저가 입력할 수 있는 공간 생성(input) --&amp;gt;
        &amp;lt;!-- 입력한 데이터를 서버에게 보낸다(form) --&amp;gt;
        &amp;lt;!-- form 태그: action 속성-어디로 데이터를 보낼 것인가?(주소값) --&amp;gt;
        &amp;lt;!-- method 속성-어떤 방식으로 데이터를 보낼 것인가? (get/post) --&amp;gt;
        &amp;lt;!-- 웹 서버의 데이터를 key: value 형태로 보낸다. --&amp;gt;
        &amp;lt;!-- 뒤에 나올 login의 login 과정 --&amp;gt;
        &amp;lt;form action=&quot;/login&quot; method=&quot;get&quot;&amp;gt;
            &amp;lt;!-- 유저의 id 입력하는 공간 --&amp;gt;
            &amp;lt;label&amp;gt;ID&amp;lt;/label&amp;gt;
            &amp;lt;input type=&quot;text&quot; name='input_id'&amp;gt;
            &amp;lt;br&amp;gt;
            &amp;lt;!-- 유저의 password 입력하는 공간 --&amp;gt;
            &amp;lt;label&amp;gt;PASSWORD&amp;lt;/label&amp;gt;
            &amp;lt;input type=&quot;password&quot; name=&quot;input_pass&quot;&amp;gt;
            &amp;lt;br&amp;gt;
            &amp;lt;!-- 데이터를 보낸다 &amp;rarr; 제출 버튼이 필요 --&amp;gt;
            &amp;lt;input type=&quot;submit&quot;&amp;gt;
        &amp;lt;/form&amp;gt;
        &amp;lt;h3&amp;gt;post 방식으로 데이터 보내기 &amp;lt;/h3&amp;gt;
        &amp;lt;!-- 뒤에 나올 login2의 login 과정 --&amp;gt;
        &amp;lt;!-- post 방식으로 데이터를 보낸다. --&amp;gt;
        &amp;lt;form action=&quot;/login2&quot; method = &quot;post&quot;&amp;gt;
            &amp;lt;label&amp;gt;ID&amp;lt;/label&amp;gt;
            &amp;lt;input type=&quot;text&quot; name='input_id'&amp;gt;
            &amp;lt;br&amp;gt;
            &amp;lt;label&amp;gt;PASSWORD&amp;lt;/label&amp;gt;
            &amp;lt;input type=&quot;password&quot; name=&quot;input_pass&quot;&amp;gt;
            &amp;lt;br&amp;gt;
            &amp;lt;input type=&quot;submit&quot;&amp;gt;
        &amp;lt;/form&amp;gt;
    &amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #666666;&quot;&gt;API 생성 2: login 1&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777298956359&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# /login 주소를 생성 (get 방식)

@app.route('/login')
def login():
    # 유저가 보낸 데이터를 받아온다. (flask 프레임워크에서 기능을 불러와서 사용)
    print(request.args)
    print(&quot;유저가 입력한 ID: &quot;, request.args['input_id'])
    print(&quot;유저가 입력한 password : &quot;, request.args['input_pass'])
    # DataBase server에 해당하는 아이디, 패스워드가 모두 일치하는 데이터가 존재하는가?
    # 조건문을 이용하여 로그인 성공/실패
    if request.args['input_id'] == 'admin' and request.args['input_pass'] == '1234':
        #로그인 성공
        return &quot;로그인 성공&quot;
    else:
        return &quot;ID, PASSWORD를 확인해주세요&quot;&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 방식은 주소창에 ID와 PASSWORD가 노출되므로 이후 과정에서 사용하지 않는다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #666666;&quot;&gt;API 생성 3: login 2&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777299140553&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# /login2 주소를 생성 (post 방식)
# post 방식은 데이터를 숨겨서 보내고,
    # 웹 브라우저에서 주소창에 입력하는 방식으로 확인이 불가능
    
@app.route('/login2', methods=['post'])
def login2():
    print(request.form)
    _id = request.form['input_id']
    _pass = request.form['input_pass']
    sql_result = db.sql_query(user.login_query, _id, _pass)
    db.commit()
    print(sql_result)
    
    if sql_result:
        # 로그인한 계정의 이름을 화면에 표시 
        sql_name = sql_result[0]['name']
        df = pd.read_csv(&quot;../csv/AAPL.csv&quot;)
        df2 = df.tail(10)
        x_data = df2['Date'].tolist()
            # Series는 html에서 사용 불가능
        y_data = df2['Adj Close'].tolist()
        th_data = df.columns.tolist()
        td_data = df2.to_dict('records')
        return render_template('main.html',
                               name = sql_name,
                               x = x_data,
                               y = y_data,
                               th_data = th_data,
                               td_data = td_data)
    else:
        return redirect('/')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;main.html&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777300363601&quot; class=&quot;html xml&quot; data-ke-language=&quot;html&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&quot;en&quot;&amp;gt;
    &amp;lt;head&amp;gt;
        &amp;lt;meta charset=&quot;UTF-8&quot;&amp;gt;
        &amp;lt;meta name=&quot;viewport&quot; content=&quot;width=device-width, initial-scale=1.0&quot;&amp;gt;
        &amp;lt;title&amp;gt;Document&amp;lt;/title&amp;gt;
    &amp;lt;/head&amp;gt;
    &amp;lt;body&amp;gt;
        &amp;lt;h1&amp;gt;Main Page&amp;lt;/h1&amp;gt;
        &amp;lt;!-- 로그인한 유저의 이름을 대입 --&amp;gt;
        &amp;lt;p&amp;gt;{{name}}님 안녕하세요!&amp;lt;/p&amp;gt;

        &amp;lt;!-- 그래프가 그려지는 영역을 먼저 선택 --&amp;gt;
        &amp;lt;div&amp;gt;
            &amp;lt;canvas id=&quot;myChart&quot;&amp;gt;&amp;lt;/canvas&amp;gt;
        &amp;lt;/div&amp;gt;

        &amp;lt;!-- 외부의 라이브러리를 호출 --&amp;gt;
        &amp;lt;script src=&quot;https://cdn.jsdelivr.net/npm/chart.js&quot;&amp;gt;&amp;lt;/script&amp;gt;

        &amp;lt;!-- 실제 그래프를 canvas에 그려주는 부분 --&amp;gt;
        &amp;lt;script&amp;gt;
            // id가 myChart인 태그를 선택해서 상수 ctx에 대입
            // 변수는 let이나 var를 이용하여 변수를 생성
          const ctx = document.getElementById('myChart');

        //   Chart class를 생성 &amp;rarr; 그래프를 그려주는 부분
        new Chart(
            ctx,
            {
            type: 'line',
            data: {
            labels: {{x | tojson}},
            datasets: [{
                label: '# of Votes',
                data: {{y | tojson}},
                borderWidth: 1
            }]
         }
      });
    &amp;lt;/script&amp;gt;

    &amp;lt;!-- 표를 생성 --&amp;gt;
    &amp;lt;!-- table 태그, tr 태그: 행을 표시, th/td 태그: 열을 표시 --&amp;gt;
    &amp;lt;table border = 1&amp;gt;
        &amp;lt;tr&amp;gt;
            &amp;lt;!--
                반복문을 이용해서 작업을 하면 짧고 유동성 있게 사용 가능
                python에서 for문은 들여쓰기를 요함 
             --&amp;gt;
            &amp;lt;!-- 그래서 필요한 반복문의 시작과 --&amp;gt;
            {% for col in th_data%}
                &amp;lt;th&amp;gt;{{col}}&amp;lt;/th&amp;gt;
            &amp;lt;!-- 반복문의 끝 --&amp;gt;
            {% endfor %}
        &amp;lt;/tr&amp;gt;


        {% for row in td_data %}
        &amp;lt;tr&amp;gt;
            {% for col in th_data %}
                &amp;lt;td&amp;gt;{{row[col]}}&amp;lt;/td&amp;gt;
            {% endfor %}
        &amp;lt;/tr&amp;gt;
        {% endfor %}
    &amp;lt;/table&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #666666;&quot;&gt;API 생성 4: html 실행&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777299263473&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;@app.route('/dashboard')
def dashboard():
    return render_template('dashboard.html')

# 웹 서버를 시작한다. (구동한다.)
app.run(debug=True)
	# debug 매개변수 &amp;rarr; 디버그 모드를 사용할 것인가? (기본값 False)
    # True 변경 시 개발 모드로 변경 &amp;rarr; 파일이 수정됐을 때 서버가 재시작&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h2 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;b&gt;4월 28일  &amp;zwj; &lt;/b&gt;&lt;/span&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/h2&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 드디어 시각화 시작!! 정신 단단히 차리자 &lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: left;&quot;&gt;- ̗̀( ˶'ᵕ'˶) ̖́-&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  html&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오전 시간에는 어제 했던 app.py 파일의 html을 업그레이드하는 작업을 수행하였다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  시각화&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;matplotlib 활용&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777351116209&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 현재 컴퓨터의 os를 확인하기 위한 라이브러리 로드
import platform&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #666666; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라인 그래프 생성&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;plt.plot&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;인자를 1개만 입력하면 y축에 데이터 입력, 이때 x축은 0부터 1씩 증가&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;인자가 2개인 경우 첫번째 인자가 x축, 두번째 인자가 y축의 데이터&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 때 두 인자의 길이가 같아야 한다.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;ipynb의 경우 plt.show를 사용하지 않아도 그래프가 출력됨&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777351196024&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.plot( [5,4,3,2,1] )
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;546&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MkouF/dJMcah5jlCb/eg1KWHNJn3ZK0t1JVm67k1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MkouF/dJMcah5jlCb/eg1KWHNJn3ZK0t1JVm67k1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MkouF/dJMcah5jlCb/eg1KWHNJn3ZK0t1JVm67k1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMkouF%2FdJMcah5jlCb%2Feg1KWHNJn3ZK0t1JVm67k1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;275&quot; height=&quot;208&quot; data-origin-width=&quot;546&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;선의 종류와 색깔 지정&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777355172592&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = [1, 2, 3, 4]
y = [70, 80, 100, 60]

plt.plot(x, y, '.-')
plt.show()
plt.plot(x, y, 'r')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;387&quot; data-origin-height=&quot;612&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nhlQq/dJMcaiiOYc1/KPgOQykSIUOJDxIG0Sb5QK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nhlQq/dJMcaiiOYc1/KPgOQykSIUOJDxIG0Sb5QK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nhlQq/dJMcaiiOYc1/KPgOQykSIUOJDxIG0Sb5QK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnhlQq%2FdJMcaiiOYc1%2FKPgOQykSIUOJDxIG0Sb5QK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;236&quot; height=&quot;373&quot; data-origin-width=&quot;387&quot; data-origin-height=&quot;612&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래프 두 개 동시에 그리기&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777357357963&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.plot(x, y)
plt.plot(x, [100, 60, 80, 40], 'r--')
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;393&quot; data-origin-height=&quot;296&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/csxoI3/dJMcahc9JWc/1D5dUKnstbJa2Q5SYKKZhK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/csxoI3/dJMcahc9JWc/1D5dUKnstbJa2Q5SYKKZhK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/csxoI3/dJMcahc9JWc/1D5dUKnstbJa2Q5SYKKZhK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcsxoI3%2FdJMcahc9JWc%2F1D5dUKnstbJa2Q5SYKKZhK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;300&quot; height=&quot;226&quot; data-origin-width=&quot;393&quot; data-origin-height=&quot;296&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;제목, x축, y축 이름 지정&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;matplotlib 기본 폰트는 한글을 지원하지 않음&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;따라서 폰트 설정을 변경한 후에 제목, 축 이름에 한글을 사용할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이때 platform lib을 사용한다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777357581281&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;if platform.system() == 'Windows':
    plt.rc('font', family='Malgun Gothic')
else:
    plt.rc('font', family='AppleGothic')

plt.plot(x, y)
plt.title('그래프 제목')
plt.xlabel('x축 이름')
plt.ylabel('y축 이름')

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;401&quot; data-origin-height=&quot;325&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/N40Ls/dJMcaiC7ufK/EwnpKWalnMynPs2LtgRStK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/N40Ls/dJMcaiC7ufK/EwnpKWalnMynPs2LtgRStK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/N40Ls/dJMcaiC7ufK/EwnpKWalnMynPs2LtgRStK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FN40Ls%2FdJMcaiC7ufK%2FEwnpKWalnMynPs2LtgRStK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;275&quot; height=&quot;223&quot; data-origin-width=&quot;401&quot; data-origin-height=&quot;325&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;마이너스 기호의 깨짐방지&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777359161782&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 둘이 같은 기능
plt.rc('axes', unicode_minus = False)
# plt.rcParams['axes.unicode_minus'] = False

plt.plot(x, [-10, 10, -20, 0])
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;560&quot; data-origin-height=&quot;422&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cRFwAF/dJMcafTXweT/yKe5YlQarFwh2mvP6UXYf1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cRFwAF/dJMcafTXweT/yKe5YlQarFwh2mvP6UXYf1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cRFwAF/dJMcafTXweT/yKe5YlQarFwh2mvP6UXYf1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcRFwAF%2FdJMcafTXweT%2FyKe5YlQarFwh2mvP6UXYf1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;330&quot; height=&quot;249&quot; data-origin-width=&quot;560&quot; data-origin-height=&quot;422&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;범례&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;하나의&amp;nbsp;영역에서&amp;nbsp;데이터가&amp;nbsp;2개&amp;nbsp;이상인&amp;nbsp;경우&amp;nbsp;&amp;rarr;&amp;nbsp;각각의&amp;nbsp;데이터들이&amp;nbsp;어떤&amp;nbsp;데이터인지&amp;nbsp;의미를&amp;nbsp;알아야한다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777360110330&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = [1,2,3]
y1 = [10, 6, 3]
y2 = [5, 8, 17]

# plt.plot(x, y1, label='A')
# plt.plot(x, y2, label='B')
# plt.legend()

plt.plot(x, y1)
plt.plot(x, y2)
plt.legend(['a', 'b'], loc='upper center')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;552&quot; data-origin-height=&quot;426&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b8CiWS/dJMcadogjKz/FGqKUwImUc0QFIRvMRLQsK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b8CiWS/dJMcadogjKz/FGqKUwImUc0QFIRvMRLQsK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b8CiWS/dJMcadogjKz/FGqKUwImUc0QFIRvMRLQsK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb8CiWS%2FdJMcadogjKz%2FFGqKUwImUc0QFIRvMRLQsK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;355&quot; height=&quot;274&quot; data-origin-width=&quot;552&quot; data-origin-height=&quot;426&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;바 그래프 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777360162346&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 수직
plt.bar(x, y1)
plt.show()

# 수평
plt.barh(x, y1)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imagegridblock&quot;&gt;
  &lt;div class=&quot;image-container&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/zQAjo/dJMcajoqLz6/oE9rKUqpGyohmTthRDo8hk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/zQAjo/dJMcajoqLz6/oE9rKUqpGyohmTthRDo8hk/img.png&quot; data-origin-width=&quot;561&quot; data-origin-height=&quot;423&quot; data-is-animation=&quot;false&quot; style=&quot;width: 49.712%; margin-right: 10px;&quot; data-widthpercent=&quot;50.3&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/zQAjo/dJMcajoqLz6/oE9rKUqpGyohmTthRDo8hk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzQAjo%2FdJMcajoqLz6%2FoE9rKUqpGyohmTthRDo8hk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;561&quot; height=&quot;423&quot;/&gt;&lt;/span&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cS6HEC/dJMcaaE8hfd/u7SWmwLfaSat2V8Umqf9M0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cS6HEC/dJMcaaE8hfd/u7SWmwLfaSat2V8Umqf9M0/img.png&quot; data-origin-width=&quot;557&quot; data-origin-height=&quot;425&quot; data-is-animation=&quot;false&quot; style=&quot;width: 49.1252%;&quot; data-widthpercent=&quot;49.7&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cS6HEC/dJMcaaE8hfd/u7SWmwLfaSat2V8Umqf9M0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcS6HEC%2FdJMcaaE8hfd%2Fu7SWmwLfaSat2V8Umqf9M0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;557&quot; height=&quot;425&quot;/&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;막대의 색상과 너비를 지정&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777361095244&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.bar(x, y1, width=0.4, color=['red', 'yellow', 'green'])
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;421&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/pBqus/dJMcadPiNxS/e1UNK8EvqKWvbIAApFToTk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/pBqus/dJMcadPiNxS/e1UNK8EvqKWvbIAApFToTk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/pBqus/dJMcadPiNxS/e1UNK8EvqKWvbIAApFToTk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FpBqus%2FdJMcadPiNxS%2Fe1UNK8EvqKWvbIAApFToTk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;302&quot; height=&quot;230&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;421&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;특정 막대의 색상을 변경&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777361177302&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;bar_graph = plt.bar(x, y1)
bar_graph[1].set_color(&quot;#FE6B00&quot;)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;428&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bW4jbL/dJMcagk0e6L/N6KDhpkLe3wKcQf2PRWCP1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bW4jbL/dJMcagk0e6L/N6KDhpkLe3wKcQf2PRWCP1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bW4jbL/dJMcagk0e6L/N6KDhpkLe3wKcQf2PRWCP1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbW4jbL%2FdJMcagk0e6L%2FN6KDhpkLe3wKcQf2PRWCP1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;362&quot; height=&quot;280&quot; data-origin-width=&quot;553&quot; data-origin-height=&quot;428&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;105&quot; data-origin-height=&quot;27&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/kx8kT/dJMcaib5S6x/Pr1A3qiS0sauPCL9rijKr1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/kx8kT/dJMcaib5S6x/Pr1A3qiS0sauPCL9rijKr1/img.png&quot; data-alt=&quot;vscode는 색상 지정이 편하다 &quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/kx8kT/dJMcaib5S6x/Pr1A3qiS0sauPCL9rijKr1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fkx8kT%2FdJMcaib5S6x%2FPr1A3qiS0sauPCL9rijKr1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;105&quot; height=&quot;27&quot; data-origin-width=&quot;105&quot; data-origin-height=&quot;27&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;vscode는 색상 지정이 편하다 &lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;산점도 그래프 생성&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777361379377&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = np.random.random(100)
y = np.random.random(100)

plt.scatter(x, y)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;555&quot; data-origin-height=&quot;422&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/AeNHj/dJMcabjKab7/WRa4rEUWzJG6ak2VlOnOmK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/AeNHj/dJMcabjKab7/WRa4rEUWzJG6ak2VlOnOmK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/AeNHj/dJMcabjKab7/WRa4rEUWzJG6ak2VlOnOmK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FAeNHj%2FdJMcabjKab7%2FWRa4rEUWzJG6ak2VlOnOmK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;345&quot; height=&quot;262&quot; data-origin-width=&quot;555&quot; data-origin-height=&quot;422&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  drinks 데이터셋을 통한 여러가지 시각화&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 로드, 확인&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777364753912&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.read_csv('../csv/drinks.csv')
df.head()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;722&quot; data-origin-height=&quot;172&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/BsIBG/dJMb990wZRU/GkdBE9kOiS460pKdjl66y1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/BsIBG/dJMb990wZRU/GkdBE9kOiS460pKdjl66y1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/BsIBG/dJMb990wZRU/GkdBE9kOiS460pKdjl66y1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FBsIBG%2FdJMb990wZRU%2FGkdBE9kOiS460pKdjl66y1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;529&quot; height=&quot;126&quot; data-origin-width=&quot;722&quot; data-origin-height=&quot;172&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1777364810303&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;472&quot; data-origin-height=&quot;260&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/co81kN/dJMcaarAjV1/cwnB4UZX2R3Hztpu7ibwbK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/co81kN/dJMcaarAjV1/cwnB4UZX2R3Hztpu7ibwbK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/co81kN/dJMcaarAjV1/cwnB4UZX2R3Hztpu7ibwbK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fco81kN%2FdJMcaarAjV1%2FcwnB4UZX2R3Hztpu7ibwbK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;310&quot; height=&quot;171&quot; data-origin-width=&quot;472&quot; data-origin-height=&quot;260&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;결측치 확인, 대체&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777364885723&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;flag = df['continent'].isna()
df.loc[flag, ]

# 결측치의 모든 데이터를 'OT' 로 대체
df.fillna('OT', inplace=True)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imagegridblock&quot;&gt;
  &lt;div class=&quot;image-container&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/S6czB/dJMcagegrWH/NiFgtBzzIuxF7rPBIIK4Zk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/S6czB/dJMcagegrWH/NiFgtBzzIuxF7rPBIIK4Zk/img.png&quot; width=&quot;263&quot; height=&quot;205&quot; data-origin-width=&quot;843&quot; data-origin-height=&quot;656&quot; data-is-animation=&quot;false&quot; style=&quot;width: 38.6317%; margin-right: 10px;&quot; data-widthpercent=&quot;39.09&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/S6czB/dJMcagegrWH/NiFgtBzzIuxF7rPBIIK4Zk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FS6czB%2FdJMcagegrWH%2FNiFgtBzzIuxF7rPBIIK4Zk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;843&quot; height=&quot;656&quot;/&gt;&lt;/span&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bRI0wE/dJMcaf0GGUc/CMkpU6BxU6VPStJ6olcPs1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bRI0wE/dJMcaf0GGUc/CMkpU6BxU6VPStJ6olcPs1/img.png&quot; data-origin-width=&quot;741&quot; data-origin-height=&quot;370&quot; data-is-animation=&quot;false&quot; style=&quot;width: 60.2055%;&quot; data-widthpercent=&quot;60.91&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bRI0wE/dJMcaf0GGUc/CMkpU6BxU6VPStJ6olcPs1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbRI0wE%2FdJMcaf0GGUc%2FCMkpU6BxU6VPStJ6olcPs1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;741&quot; height=&quot;370&quot;/&gt;&lt;/span&gt;&lt;/div&gt;
  &lt;figcaption&gt;왼: isna(), 오른: fillna('OT')&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터간 상관계수 확인&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777365165407&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 문자열 데이터 drop
df2 = df.drop(columns=['country', 'continent'])
df2.corr()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;711&quot; data-origin-height=&quot;143&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/SoXPW/dJMcabcUxpQ/IAuyZ40bgSqxCgVkY7RLvk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/SoXPW/dJMcabcUxpQ/IAuyZ40bgSqxCgVkY7RLvk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/SoXPW/dJMcabcUxpQ/IAuyZ40bgSqxCgVkY7RLvk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FSoXPW%2FdJMcabcUxpQ%2FIAuyZ40bgSqxCgVkY7RLvk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;507&quot; height=&quot;102&quot; data-origin-width=&quot;711&quot; data-origin-height=&quot;143&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;산점도 그려보기&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777365285627&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = df['beer_servings']
y = df['total_litres_of_pure_alcohol']
plt.scatter(x, y)
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;558&quot; data-origin-height=&quot;423&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/biaR82/dJMcaiXol0e/5pyvkAFVT6t2ixRMFZK9sk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/biaR82/dJMcaiXol0e/5pyvkAFVT6t2ixRMFZK9sk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/biaR82/dJMcaiXol0e/5pyvkAFVT6t2ixRMFZK9sk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbiaR82%2FdJMcaiXol0e%2F5pyvkAFVT6t2ixRMFZK9sk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;334&quot; height=&quot;253&quot; data-origin-width=&quot;558&quot; data-origin-height=&quot;423&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;heatmap&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777365814346&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import seaborn as sns

corr = df.iloc[:, 1:5].corr()

plt.figure(figsize=(12, 8))
sns.heatmap(data=corr,
            annot=True,
            square=True,
            fmt='0.3f',
            annot_kws={'size': 10},
            cbar = True,
            cmap = 'Blues',
            xticklabels=['맥주', '위스키', '와인', '순수알콜'],
            yticklabels=['맥주', '위스키', '와인', '순수알콜']
            )

# annot: 주석 표시 유무
# square: 히트탭 정사각형 표시 유무
# fmt = 주석의 소수점 자리수를 지정
# annot_kws = 주석의 환경 설정 (size: 글자의 크기를 변경)
# cbar = 사이드바 표시여부&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;781&quot; data-origin-height=&quot;685&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bUEfsW/dJMcafGqzaW/ld4RYWBdTri1BH9kfmm4Mk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bUEfsW/dJMcafGqzaW/ld4RYWBdTri1BH9kfmm4Mk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bUEfsW/dJMcafGqzaW/ld4RYWBdTri1BH9kfmm4Mk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbUEfsW%2FdJMcafGqzaW%2Fld4RYWBdTri1BH9kfmm4Mk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;227&quot; height=&quot;199&quot; data-origin-width=&quot;781&quot; data-origin-height=&quot;685&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;pairplot&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777365912400&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sns.pairplot(
    df.loc[:, '맥주소비량':'순수알콜'],
    corner = True,              # 중복이 되는 산점도 그래프 제거
    kind = 'reg',               # 추세선 추가
    plot_kws = {
        'line_kws' : {
            'color' : 'red'     # 추세선의 색상을 변경
        }
    }
)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;813&quot; data-origin-height=&quot;812&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ba2QJM/dJMcabD039Q/RNI8ONEo9eLn2cTr6dWT20/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ba2QJM/dJMcabD039Q/RNI8ONEo9eLn2cTr6dWT20/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ba2QJM/dJMcabD039Q/RNI8ONEo9eLn2cTr6dWT20/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fba2QJM%2FdJMcabD039Q%2FRNI8ONEo9eLn2cTr6dWT20%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;308&quot; height=&quot;308&quot; data-origin-width=&quot;813&quot; data-origin-height=&quot;812&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;연습 문제&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1.&amp;nbsp;컬럼의&amp;nbsp;이름들을&amp;nbsp;변경 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2.&amp;nbsp;파생변수&amp;nbsp;&amp;rarr;&amp;nbsp;총&amp;nbsp;술&amp;nbsp;소비량:&amp;nbsp;맥주소비량+위스키소비량+와인소비량 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;3.&amp;nbsp;파생변수&amp;nbsp;&amp;rarr;&amp;nbsp;도수:&amp;nbsp;(순수알콜/총술소비량)*100 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;4.&amp;nbsp;대륙별&amp;nbsp;그룹화하여&amp;nbsp;도수의&amp;nbsp;평균을&amp;nbsp;구한다&amp;nbsp;&amp;rarr;&amp;nbsp;가장&amp;nbsp;독한&amp;nbsp;술을&amp;nbsp;먹는&amp;nbsp;대륙은&amp;nbsp;어디인가?&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777365758999&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 1)
df.columns = ['국가', '맥주소비량', '위스키소비량', '와인소비량', '순수알콜', '대륙']

# 2)
df['총술소비량'] = df['맥주소비량']+df['위스키소비량']+df['와인소비량']

# 3)
df['도수'] = df['순수알콜']/df['총술소비량']*100

#4)
df[df['총술소비량'] == 0]&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;우리나라의 총 술 소비량은 전세계 몇 위일까?&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777366067125&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 총 술 소비량을 기준으로 내림차순 정렬
df.sort_values('총술소비량', ascending=False, inplace=True)
# 인덱스를 초기화하고 기존의 인덱스를 제거
df.reset_index(drop=True, inplace=True)

# 순위 &amp;rarr; 위치 값에 +1
# 국가의 이름이 'South Korea' 인 데이터를 추출해서 인덱스를 확인

flag = df['국가'] == 'South Korea'
korea_index = df.loc[flag,].index[0]
korea_rank = korea_index + 1
korea_value = df.loc[korea_index, '총술소비량']&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래프 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777366090199&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x = df.index
y = df['총술소비량']

flag = df['국가'] == 'South Korea'
korea_index = df.loc[flag,].index[0]
korea_rank = korea_index + 1
korea_value = df.loc[korea_index, '총술소비량']

plt.figure(figsize=(20, 8))
bar_graph = plt.bar(x, y, color='gray')

# 한국 위치의 막대를 선택해서 색상 변경
bar_graph[korea_index].set_color('red')

# 그래프에 주석 추가 (화살표로 주석이 가리키는 곳을 표시)
plt.annotate(
    f&quot;South Korea \n rank: {korea_rank} \n 총술소비량: {korea_value}&quot;,
    xy = (korea_index, korea_value),
    xytext= (korea_index+10, korea_value+150),
    arrowprops= {
        'facecolor' : 'pink',       # 화살표 안쪽 색상
        'edgecolor' : 'grey',       # 화살표 테두리 색상
        'headwidth' : 20,           # 화살표 머리 너비
        'headlength' : 20,          # 화살표 머리길이
        'width': 10,                # 화살표 꼬리의 너비
        'shrink': 0.1               # 화살표와 xy, xytext 간의 간격 조정
    }
)

# annotate 첫번째 인자: 주석이 들어갈 텍스트 지정
# 두번째 인자: 주석이 어느 부분을 설명하는가? (좌표)
# 세번째 인자: 주석 문구의 위치
# 네번째 인자: 주석과 해당 데이터를 화살표로 연결

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;818&quot; data-origin-height=&quot;342&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ctUxa9/dJMcaaLSXKH/9UgUaF13qBBVoQDkSZgxJ0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ctUxa9/dJMcaaLSXKH/9UgUaF13qBBVoQDkSZgxJ0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ctUxa9/dJMcaaLSXKH/9UgUaF13qBBVoQDkSZgxJ0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FctUxa9%2FdJMcaaLSXKH%2F9UgUaF13qBBVoQDkSZgxJ0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;686&quot; height=&quot;287&quot; data-origin-width=&quot;818&quot; data-origin-height=&quot;342&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h2 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;4월 29일  &amp;zwj; &lt;/b&gt;&lt;/span&gt;&lt;/h2&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;:&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;고수님들이 하는 짱멋진 시각화 - 엄청 많은 레이어도 깔끔하게 처리해서 한눈에 확 들어오게 함&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;내가 하는 시각화 - 색깔놀이 히히히&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;...센스가 하늘에서 내려올 수 있다면 얼마나 좋을까 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  offline 데이터 활용 시각화&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;라이브러리 로드&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777435314199&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import platform
import matplotlib.pyplot as plt&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;데이터 로드, 확인&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777435416775&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;offline = pd.read_csv('../csv/card/오프라인.csv', encoding='cp949')
offline&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;580&quot; data-origin-height=&quot;380&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bJLrR9/dJMcadaHCl0/EA1MkYU49RYCZdh2tfU981/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bJLrR9/dJMcadaHCl0/EA1MkYU49RYCZdh2tfU981/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bJLrR9/dJMcadaHCl0/EA1MkYU49RYCZdh2tfU981/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbJLrR9%2FdJMcadaHCl0%2FEA1MkYU49RYCZdh2tfU981%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;408&quot; height=&quot;267&quot; data-origin-width=&quot;580&quot; data-origin-height=&quot;380&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;컬럼명 변경&lt;br /&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777435806601&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 항목정의서 엑셀 파일을 로드

col_info = pd.read_excel('../csv/card/항목정의서.xlsx')
col_info&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;602&quot; data-origin-height=&quot;203&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/U8K3c/dJMcadogUT7/E3DfOqGOpKZBKiFpVgVWK1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/U8K3c/dJMcadogUT7/E3DfOqGOpKZBKiFpVgVWK1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/U8K3c/dJMcadogUT7/E3DfOqGOpKZBKiFpVgVWK1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FU8K3c%2FdJMcadogUT7%2FE3DfOqGOpKZBKiFpVgVWK1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;391&quot; height=&quot;132&quot; data-origin-width=&quot;602&quot; data-origin-height=&quot;203&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1777435900282&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;col_info['데이터 컬럼'].to_list()
# ['기준년월', '업종 구분', '성별', '연령대', '이용 건수', '이용 금액']

offline.columns = col_info['데이터 컬럼'].to_list()
offline&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;462&quot; data-origin-height=&quot;381&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bG9OK3/dJMcai39GIB/1fJ2i345kKvuit1F83GdU1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bG9OK3/dJMcai39GIB/1fJ2i345kKvuit1F83GdU1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bG9OK3/dJMcai39GIB/1fJ2i345kKvuit1F83GdU1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbG9OK3%2FdJMcai39GIB%2F1fJ2i345kKvuit1F83GdU1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;310&quot; height=&quot;256&quot; data-origin-width=&quot;462&quot; data-origin-height=&quot;381&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;결측치 확인&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777436527954&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;offline.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;322&quot; data-origin-height=&quot;262&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/yzxTQ/dJMcabcU5cu/XIjs2WgXC3qsbVHOl2Dsg1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/yzxTQ/dJMcabcU5cu/XIjs2WgXC3qsbVHOl2Dsg1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/yzxTQ/dJMcabcU5cu/XIjs2WgXC3qsbVHOl2Dsg1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FyzxTQ%2FdJMcabcU5cu%2FXIjs2WgXC3qsbVHOl2Dsg1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;160&quot; height=&quot;130&quot; data-origin-width=&quot;322&quot; data-origin-height=&quot;262&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1777436646882&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 항목정의서를 기준으로 성별에서 '-'가 결측치이다.

(offline['성별']=='-').sum()		# np.int64(0)

offline['성별'].value_counts()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;225&quot; data-origin-height=&quot;90&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/oEeDh/dJMcadogWQ3/C0bSnZPiIgHni23AHUtvS1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/oEeDh/dJMcadogWQ3/C0bSnZPiIgHni23AHUtvS1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/oEeDh/dJMcadogWQ3/C0bSnZPiIgHni23AHUtvS1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FoEeDh%2FdJMcadogWQ3%2FC0bSnZPiIgHni23AHUtvS1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;178&quot; height=&quot;71&quot; data-origin-width=&quot;225&quot; data-origin-height=&quot;90&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;결측치가 없는 것으로 파악되었다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;컬럼 타입 변경&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777437634826&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from datetime import datetime
offline['기준년월'] = pd.to_datetime(offline['기준년월'], format='%Y%m')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래프의 한글 깨짐 방지&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777439733831&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;if platform.system() == 'Windows':
    plt.rc('font', family = 'Malgun Gothic')
else:
    plt.rc('font', family = 'AppleGothic')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;데이터 시각화&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;월별, 성별을 기준으로 이용 금액의 평균을 그래프로 표시&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777441293590&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pd.pivot_table(
    offline,
    index = '기준월',
    columns = '성별',
    values = '이용 금액',
    aggfunc = 'mean'
).plot(kind='bar', color=(&quot;#ffa9a9&quot;, &quot;#b0bfff&quot;))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리8.png&quot; data-origin-width=&quot;544&quot; data-origin-height=&quot;447&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eH6cVc/dJMcagL7MZW/K3NzQnZBw1kL8pokneVVFk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eH6cVc/dJMcagL7MZW/K3NzQnZBw1kL8pokneVVFk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eH6cVc/dJMcagL7MZW/K3NzQnZBw1kL8pokneVVFk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeH6cVc%2FdJMcagL7MZW%2FK3NzQnZBw1kL8pokneVVFk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;261&quot; height=&quot;214&quot; data-filename=&quot;티스토리8.png&quot; data-origin-width=&quot;544&quot; data-origin-height=&quot;447&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;기준년월별 업종별로 이용 금액의 합계를 그래프로 표시&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777441432191&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pd.pivot_table(
    offline,
    index = '기준년월',
    columns = '업종 구분',
    values = '이용 금액',
    aggfunc = 'sum'
).plot(color=(&quot;#FFA4A4&quot;, &quot;#A1BAFF&quot;, &quot;#78E26A&quot;, &quot;#FFEC95&quot;))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리9.png&quot; data-origin-width=&quot;533&quot; data-origin-height=&quot;460&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bjKxh9/dJMcaib6ESy/n9Ztr0jAWpInhuQuxNc1oK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bjKxh9/dJMcaib6ESy/n9Ztr0jAWpInhuQuxNc1oK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bjKxh9/dJMcaib6ESy/n9Ztr0jAWpInhuQuxNc1oK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbjKxh9%2FdJMcaib6ESy%2Fn9Ztr0jAWpInhuQuxNc1oK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;355&quot; height=&quot;306&quot; data-filename=&quot;티스토리9.png&quot; data-origin-width=&quot;533&quot; data-origin-height=&quot;460&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;여러 개의 데이터를 한 캔버스 안에서 따로따로 그릴 때&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777442499598&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# subplots() &amp;rarr; 영역을 나눠주는 함수 &amp;rarr; 영역을 나눈 캔버스를 생성
    # &amp;rarr; 앞에서 생성한 캔버스의 크기는 초기화하고 새로 생성
    # 2개의 데이터를 되돌려준다. // 1. 캔버스, 2. 캔버스 영역


fig, axes = plt.subplots(2, 2, figsize=(20,8))
axes[0][0].plot([1,2,3,4], color=&quot;#FFB2B2&quot;)
axes[0][1].plot([1,2,3,4], color=&quot;#C0CBFF&quot;)
axes[1][0].plot([1,2,3,4], color=&quot;#97D48F&quot;)
axes[1][1].plot([1,2,3,4], color=&quot;#FFE89E&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리10.png&quot; data-origin-width=&quot;1598&quot; data-origin-height=&quot;658&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bQIaXY/dJMcahK1upk/NWDeUOJOSMli5knU6Q9zkk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bQIaXY/dJMcahK1upk/NWDeUOJOSMli5knU6Q9zkk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bQIaXY/dJMcahK1upk/NWDeUOJOSMli5knU6Q9zkk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbQIaXY%2FdJMcahK1upk%2FNWDeUOJOSMli5knU6Q9zkk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1598&quot; height=&quot;658&quot; data-filename=&quot;티스토리10.png&quot; data-origin-width=&quot;1598&quot; data-origin-height=&quot;658&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;한 화면에 연령대 별로 그래프를 따로 생성&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777443452282&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;group_data2 = pd.pivot_table(
    offline,
    index = '기준년월',
    columns = '연령대',
    values = '이용 금액',
    aggfunc = 'sum'
)

group_data2.plot(figsize=(10,10))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리11.png&quot; data-origin-width=&quot;823&quot; data-origin-height=&quot;860&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YSEvS/dJMcajorCGs/qvKlaYTwUnU5hzW0gP1ov1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YSEvS/dJMcajorCGs/qvKlaYTwUnU5hzW0gP1ov1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YSEvS/dJMcajorCGs/qvKlaYTwUnU5hzW0gP1ov1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYSEvS%2FdJMcajorCGs%2FqvKlaYTwUnU5hzW0gP1ov1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;354&quot; height=&quot;370&quot; data-filename=&quot;티스토리11.png&quot; data-origin-width=&quot;823&quot; data-origin-height=&quot;860&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;성별, 업종별 총 이용금액의 합계를 라인 그래프로 표시&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777446034109&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;group_data3 = pd.pivot_table(
    offline,
    index = '업종 구분',
    columns = '성별',
    values = '이용 금액',
    aggfunc = 'sum'
)

group_data3 = offline.groupby(['기준년월', '성별', '업종 구분'])['이용 금액'].sum()
group_data3 = group_data3.unstack().unstack()
group_data3.iloc[:, 0]

plt.figure(figsize=(16, 20))

for i in range(len(group_data3.columns)):
    plt.subplot(4, 2, i+1)
    plt.plot(group_data3.iloc[:, i])
    plt.title(group_data3.columns[i])

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리12.png&quot; data-origin-width=&quot;1296&quot; data-origin-height=&quot;1606&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bgLLSs/dJMcacbRBqc/eSPHRMRt4B5tpUkTNaHSC1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bgLLSs/dJMcacbRBqc/eSPHRMRt4B5tpUkTNaHSC1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bgLLSs/dJMcacbRBqc/eSPHRMRt4B5tpUkTNaHSC1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbgLLSs%2FdJMcacbRBqc%2FeSPHRMRt4B5tpUkTNaHSC1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;401&quot; height=&quot;497&quot; data-filename=&quot;티스토리12.png&quot; data-origin-width=&quot;1296&quot; data-origin-height=&quot;1606&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  offline+online 데이터 활용 시각화&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;데이터 로드&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777604575749&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;offline = pd.read_csv('../csv/card/오프라인.csv', encoding='cp949')
online = pd.read_csv('../csv/card/온라인.csv', encoding='cp949')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터 프레임 컬럼명 변경&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777605427726&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;offline.columns = ['기준년월', '업종', '성별', '연령대', '이용건수', '이용금액']
online.columns = ['기준년월', '성별', '연령대', '이용건수', '이용금액']&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;online dataset에서는 업종 column이 없다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터 결합&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777605555838&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.concat( [offline, online], axis = 0, ignore_index = True )

# df 업종 column의 결측치 대체
df['업종'] = df['업종'].fillna('온라인')

# 기준연월 시계열로 변경
df['기준년월'] = pd.to_datetime(df['기준년월'], format='%Y%m')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모든 업종별로 이용건수 차이를 그래프로 확인&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777606174154&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pd.pivot_table(
    df,
    index = '기준년월',
    columns = '업종',
    values = '이용건수',
    aggfunc = 'sum'
).plot(figsize=(16,10), color=(&quot;#FFB9B9&quot;, &quot;#9ECBFF&quot;, &quot;#96E4A3&quot;, &quot;#FFE59C&quot;, &quot;#CC9EDA&quot;))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리13.png&quot; data-origin-width=&quot;1288&quot; data-origin-height=&quot;860&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cmDaWW/dJMcacJKXbK/FNpy18GDlK8wLytrJBY0j0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cmDaWW/dJMcacJKXbK/FNpy18GDlK8wLytrJBY0j0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cmDaWW/dJMcacJKXbK/FNpy18GDlK8wLytrJBY0j0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcmDaWW%2FdJMcacJKXbK%2FFNpy18GDlK8wLytrJBY0j0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;445&quot; height=&quot;297&quot; data-filename=&quot;티스토리13.png&quot; data-origin-width=&quot;1288&quot; data-origin-height=&quot;860&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  AAPL 데이터 활용&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;애플 주식 데이터&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;목표: 이동 평균선 도출&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1777608184431&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
import matplotlib.pyplot as plt
import platform

if platform.system() == 'Windows':
    plt.rc('font', family = 'Malgun Gothic')

df = pd.read_csv('../csv/AAPL.csv')

# 필요한 column만 불러오고 싶을 때는
df2 = pd.read_csv('../csv/aapl.csv', usecols=['Date', 'Adj Close'])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1777608337331&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2.info()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;412&quot; data-origin-height=&quot;226&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bCGwHx/dJMcahj0rRN/LcvNXFlz9AzX9lIjW7IFm0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bCGwHx/dJMcahj0rRN/LcvNXFlz9AzX9lIjW7IFm0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bCGwHx/dJMcahj0rRN/LcvNXFlz9AzX9lIjW7IFm0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbCGwHx%2FdJMcahj0rRN%2FLcvNXFlz9AzX9lIjW7IFm0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;230&quot; height=&quot;126&quot; data-origin-width=&quot;412&quot; data-origin-height=&quot;226&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;결측치 처리&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;결측치가 하나 뿐이라 결측치가 포함된 행은 drop&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Date column은 datetime type으로 변경&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777608417848&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2.dropna(inplace=True)

df2['Date'] = pd.to_datetime(df2['Date'])
df2.info()

# Date column을 index로 설정
df2.set_index('Date', inplace=True)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;pre id=&quot;code_1777608952400&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2.iloc[0:20, 0].mean()

# 이동평균값 컬럼을 먼저 생성 &amp;rarr; 데이터는 0.0을 대입
	# (평균을 구할 것이기에 float 타입으로 만들기 위함)
df2['이동평균값'] = 0.0

for i in range(len(df2)-19):
    df2.iloc[i+19, 1] = df2.iloc[i:i+20, 0].mean()
   
# 다른 방법으로 인덱스끼리 묶어서 연산을 해주는 함수가 존재: rolling(n)
df2['center2'] = df2['Adj Close'].rolling(20).mean().fillna(0

df2[['Adj Close', 'center2']].tail(100).plot(figsize=(30, 10))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리14.png&quot; data-origin-width=&quot;2377&quot; data-origin-height=&quot;765&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YHufU/dJMcadaJAp6/JW3Ch1yKpRAR9pufA9E4ck/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YHufU/dJMcadaJAp6/JW3Ch1yKpRAR9pufA9E4ck/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YHufU/dJMcadaJAp6/JW3Ch1yKpRAR9pufA9E4ck/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYHufU%2FdJMcadaJAp6%2FJW3Ch1yKpRAR9pufA9E4ck%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2377&quot; height=&quot;765&quot; data-filename=&quot;티스토리14.png&quot; data-origin-width=&quot;2377&quot; data-origin-height=&quot;765&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래프 꾸미기&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777609009394&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(30, 10))
plt.plot(df2.tail(100)['Adj Close'], color='#000000')
plt.plot(df2.tail(100)['center2'], '--', color=&quot;#cf2424&quot;)

plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리15.png&quot; data-origin-width=&quot;2400&quot; data-origin-height=&quot;812&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b4SZOz/dJMcaf0Jagj/YNocjlXdznRdvnZpAO8LS0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b4SZOz/dJMcaf0Jagj/YNocjlXdznRdvnZpAO8LS0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b4SZOz/dJMcaf0Jagj/YNocjlXdznRdvnZpAO8LS0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb4SZOz%2FdJMcaf0Jagj%2FYNocjlXdznRdvnZpAO8LS0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2400&quot; height=&quot;812&quot; data-filename=&quot;티스토리15.png&quot; data-origin-width=&quot;2400&quot; data-origin-height=&quot;812&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777609316074&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2['ub'] = df2['center2'] + (2*df2['Adj Close'].rolling(20).std())
df2['lb'] = df2['center2'] - (2*df2['Adj Close'].rolling(20).std())

# 최근 100일치 데이터에서 상단밴드, 하단밴드, 이평선, 종가 그래프
df2.drop(['이동평균값'], axis=1).tail(100).plot()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리16.png&quot; data-origin-width=&quot;548&quot; data-origin-height=&quot;414&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bq7nSI/dJMcah5mcus/RsvD5uGnbTjPlU79CCsBGK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bq7nSI/dJMcah5mcus/RsvD5uGnbTjPlU79CCsBGK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bq7nSI/dJMcah5mcus/RsvD5uGnbTjPlU79CCsBGK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbq7nSI%2FdJMcah5mcus%2FRsvD5uGnbTjPlU79CCsBGK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;303&quot; height=&quot;229&quot; data-filename=&quot;티스토리16.png&quot; data-origin-width=&quot;548&quot; data-origin-height=&quot;414&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이것도 좋지만 조금 더 깔끔하게 보고 싶어져서 따로 찾아본 버전&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777609376838&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;plt.figure(figsize=(30,10))
plt.plot(df2['ub'].tail(200), color=&quot;#52865F&quot;, label='ub')
plt.plot(df2['lb'].tail(200), color='#52865F', label='lb')
plt.plot(df2['center2'].tail(200), color=&quot;#FF00F2&quot;, label='이평선')
plt.plot(df2['Adj Close'].tail(200), marker = 'o', markersize=5, color=&quot;#C25555&quot;, label='종가')
plt.fill_between(df2.tail(200).index, df2['ub'].tail(200), df2['lb'].tail(200), color = 'green', alpha = 0.05)
plt.legend()
plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;plt.fill_between을 사용하면 색깔이 채워져서 더욱 band 같아보일 것 같았다!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리17.png&quot; data-origin-width=&quot;2377&quot; data-origin-height=&quot;812&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bR1d4B/dJMcaiXqOR7/y68b05RRnsm5lZEoIXD5J0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bR1d4B/dJMcaiXqOR7/y68b05RRnsm5lZEoIXD5J0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bR1d4B/dJMcaiXqOR7/y68b05RRnsm5lZEoIXD5J0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbR1d4B%2FdJMcaiXqOR7%2Fy68b05RRnsm5lZEoIXD5J0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2377&quot; height=&quot;812&quot; data-filename=&quot;티스토리17.png&quot; data-origin-width=&quot;2377&quot; data-origin-height=&quot;812&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h2 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;4월 30일  &amp;zwj; &lt;/b&gt;&lt;/span&gt;&lt;/h2&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: tableau는 코드가 불필요하다. (엑셀 기초 정도의 실력만 있으면 될 듯)&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;혼자서 tableau를 쓰려고 했을 때 가장 막막한 것은 시작이었다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;새로운 툴에 대한 마음적 장벽도 있지만, 어떤 데이터를 가지고 시작해야할지를 몰랐다고 해야할까...?&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;matplotlib의 디자인을 보다가 seaborn을 보고 놀랐고, seaborn을 보다가 tableau를 보니 더욱 놀랐다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;특히 지리 데이터를 다루는 것을 꼭 한번 시도해보고 싶었는데 tableau에서 해당 부분 시각화가 가능하다는 생각에 기뻤다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;GA는 웹을 관리하는 입장, 예를 들면 소형 쇼핑몰 등에서 아주 유용하게 사용할 수 있는 툴이었다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;아직 html과 많이 친하지 않아서... 또 tableau를 보다가 GA를 보려니까 훨씬 복잡한 느낌이긴 했다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 부분 진도를 짧게 나간지라 아직 시스템과 친해질 시간이 많이 필요해보인다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;개인적으로는 looker와 GA를 어떻게 같이 사용하는지가 궁금했는데, 시간이 부족한 관계로 이 부분까지는 진도를 나가지 못했다. 추후에 수행할 기회가 왔으면 좋겠다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Tableau가 워낙 직관적으로 구성이 잘 돼있어서 크게 작성할 것이 없다...! 그래서 신기했던 것 위주로 짤막하게 작성해보려 한다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;서울시의 행정구별 평균 할인율&lt;/span&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1909&quot; data-origin-height=&quot;973&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bW77zF/dJMcab47ri2/CECKmBOTn6kbIWvf6mitGk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bW77zF/dJMcab47ri2/CECKmBOTn6kbIWvf6mitGk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bW77zF/dJMcab47ri2/CECKmBOTn6kbIWvf6mitGk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbW77zF%2FdJMcab47ri2%2FCECKmBOTn6kbIWvf6mitGk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1909&quot; height=&quot;973&quot; data-origin-width=&quot;1909&quot; data-origin-height=&quot;973&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;처음 실행했을 때는 중구와 강서구의 데이터가 나오지 않았다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;다른 지역(ex. 부산광역시)에 중구, 강서구가 있었기 때문에 정확한 지역을 표시할 수 없다고 나온 것이었다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 때 세부정보로 Address SD(시도)를 추가하니 문제가 해결되었다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;폭포수 차트&lt;/span&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1337&quot; data-origin-height=&quot;807&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MOlJp/dJMcaad5Pcv/dIF2nE7kC8DCjKvLWPhptk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MOlJp/dJMcaad5Pcv/dIF2nE7kC8DCjKvLWPhptk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MOlJp/dJMcaad5Pcv/dIF2nE7kC8DCjKvLWPhptk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMOlJp%2FdJMcaad5Pcv%2FdIF2nE7kC8DCjKvLWPhptk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1337&quot; height=&quot;807&quot; data-origin-width=&quot;1337&quot; data-origin-height=&quot;807&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2021년의 경우 적더라도 수익이 꾸준히 난 구조이기에 이게 뭐? 싶을수도 있는 결과가 도출되었다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1580&quot; data-origin-height=&quot;806&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KNy1W/dJMcagL9FSy/9dCvNkhohmQrYoFSfISBtK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KNy1W/dJMcagL9FSy/9dCvNkhohmQrYoFSfISBtK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KNy1W/dJMcagL9FSy/9dCvNkhohmQrYoFSfISBtK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKNy1W%2FdJMcagL9FSy%2F9dCvNkhohmQrYoFSfISBtK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1580&quot; height=&quot;806&quot; data-origin-width=&quot;1580&quot; data-origin-height=&quot;806&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그래서 나름 다이나믹했던 2019년부터 2020년까지의 데이터를 보았다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;빨간색에 가까울 수록 이익이 마이너스, 파란색에 가까울 수록 이익이 플러스이다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;각 막대가 y=0에서 시작하는 것이 아닌, 전월의 마지막 시점에서 시작한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;&lt;s&gt;말해놓고 보니 주식 차트였네&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;4주차 소감&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;생각보다 시각화를 많이 하지 않은 느낌이라 아쉽긴 하지만...&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;새로운 것을 많이 배워갈 수 있는 시간이었다!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;연휴 동안 복습 게을리하지말고, 지금까지 배운 것들을 빅분기 실기랑 내용 대조해가면서 공부도 조금씩 하자!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;다음 주차는 머신러닝이다. 빅분기에서 써먹을 일 많을테니 열심히 하자! &lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: left;&quot;&gt;- ̗̀( ˶'ᵕ'˶) ̖́-&lt;/span&gt; &lt;/span&gt;&lt;/p&gt;</description>
      <category>멀티캠퍼스부트캠프</category>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/4</guid>
      <comments>https://bbgw-oshoulder.tistory.com/4#entry4comment</comments>
      <pubDate>Fri, 1 May 2026 17:27:53 +0900</pubDate>
    </item>
    <item>
      <title>3주차 Note: 데이터 수집</title>
      <link>https://bbgw-oshoulder.tistory.com/3</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock style1&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;KakaoTalk_20260424_212754252.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmoQGM/dJMcadoePmE/6ykbtwKdZIRfhclNCKk50K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmoQGM/dJMcadoePmE/6ykbtwKdZIRfhclNCKk50K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmoQGM/dJMcadoePmE/6ykbtwKdZIRfhclNCKk50K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmoQGM%2FdJMcadoePmE%2F6ykbtwKdZIRfhclNCKk50K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;495&quot; height=&quot;264&quot; data-filename=&quot;KakaoTalk_20260424_212754252.png&quot; data-origin-width=&quot;1920&quot; data-origin-height=&quot;1024&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt; ️ 3주차 : 4월 20일 - 4월 24일&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;멀티캠퍼스&lt;/span&gt; 부트캠프&amp;nbsp;3주차&amp;nbsp;요약✍&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[&amp;nbsp;4/20&amp;nbsp;]&amp;nbsp;&lt;b&gt;데이터&amp;nbsp;수집&amp;nbsp;(AI&amp;nbsp;활용)&lt;/b&gt;:&amp;nbsp;python과&amp;nbsp;db&amp;nbsp;연결(+모듈&amp;nbsp;생성),&amp;nbsp;openAPI&amp;nbsp;data((json/xml)를&amp;nbsp;DataFrame으로&amp;nbsp;변환 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[&amp;nbsp;4/21&amp;nbsp;]&amp;nbsp;&lt;b&gt;데이터&amp;nbsp;수집&amp;nbsp;(AI&amp;nbsp;활용)&lt;/b&gt;:&amp;nbsp;HTML,&amp;nbsp;크롤링 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[&amp;nbsp;4/22&amp;nbsp;]&amp;nbsp;&lt;b&gt;데이터&amp;nbsp;수집&amp;nbsp;(AI&amp;nbsp;활용)&lt;/b&gt;:&amp;nbsp;Selenium &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[&amp;nbsp;4/23&amp;nbsp;]&amp;nbsp;&lt;b&gt;데이터&amp;nbsp;수집&amp;nbsp;(AI&amp;nbsp;활용)&lt;/b&gt;:&amp;nbsp;SQL과&amp;nbsp;Python의&amp;nbsp;문법&amp;nbsp;비교 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;[&amp;nbsp;4/24&amp;nbsp;]&amp;nbsp;&lt;b&gt;Github&amp;nbsp;특강&lt;/b&gt;:&amp;nbsp;본문에서는&amp;nbsp;생략&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;4월&amp;nbsp;20일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;:  오류의 늪 시작... &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;초심자의 입장에서 주석은 정말 중요하지만 긴 주석과 긴 코드를 보고 있으니 머리가 더욱 어지럽다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;주석과 코드를 같이 쓸 때의 가독성을 확보할 수 있는 방법을 강구해보아야겠다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt; &amp;nbsp;python과&amp;nbsp;db&amp;nbsp;연결&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;dotenv&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;python과 database를 연결할 때 주소, 포트 등 서버의 정보를 입력하게 되는데,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이를 그냥 온라인에 업로드하면 보안상 취약점이 된다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그러한&amp;nbsp;부분을&amp;nbsp;숨기는&amp;nbsp;기능이&amp;nbsp;dotenv이다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777198429824&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;!pip install python-dotenv

# 라이브러리 load
from dotenv import load_dotenv
import pymysql
import os
import pandas as pd

# os &amp;rarr; python 환경에서 사용하는 변수들에 접근하기 위해 사용
# load_dotenv &amp;rarr; .env 파일의 내용을 환경 변수에 임시 등록&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;dotenv를 사용하기 위해 .env 파일을 생성하고, python에서&amp;nbsp;load_dotenv&amp;nbsp;실행&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777209468544&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;load_dotenv()		# True

# 등록된 환경변수에 접근
os.getenv('port')	# 포트번호가 '문자열'로 출력: 숫자 변환 필요&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;나의 경우 &lt;i&gt;&lt;span style=&quot;color: #ee2323;&quot;&gt;OperationalError: (2003, &quot;Can't connect to MySQL server on '`서버코드`' ([WinError 10061] 대상 컴퓨터에서 연결을 거부했으므로 연결하지 못했습니다)&quot;)&lt;/span&gt;&lt;/i&gt; 라는 에러가 출력되었는데, 서비스 &amp;rarr; MySQL80이 실행되고 있는지 확인해야한다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;백그라운드에서&amp;nbsp;꺼져있었기에&amp;nbsp;발생한&amp;nbsp;문제이고,&amp;nbsp;실행하고&amp;nbsp;난&amp;nbsp;뒤&amp;nbsp;정상적으로&amp;nbsp;작동하였다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;Cursor 생성&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777209654398&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;cursor = _db.cursor(pymysql.cursors.DictCursor)

# DictCursor는 select의 결과값을 Dict 형태로 받아온다.&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;Table 생성&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777209679194&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# DDL: 테이블 생성
# 기존의 테이블이 존재한다면 테이블 생성 X
# 없으면 생성

create_table = &quot;&quot;&quot;
    CREATE TABLE IF NOT EXISTS
    `user_info`
    (
        `id` VARCHAR(32) PRIMARY KEY,
        `password` VARCHAR(32) NOT NULL,
        `name` VARCHAR(32),
        `age` INT
    )&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;질의&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777209714899&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# cursor를 통해서 sql 쿼리문을 실행할 때는 execute() 함수를 이용
cursor.execute(create_table)		# 0&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;데이터 대입&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777209740288&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;signup_query = &quot;&quot;&quot;
    INSERT INTO
    `user_info`
    VALUES (
        &quot;test&quot;, &quot;1234&quot;, &quot;kim&quot;, 30
    )
&quot;&quot;&quot;
cursor.execute(signup_query)	# 1&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;fetchall()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777209863718&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 결과값을 불러오는 함수

cursor.fetchall()
# [{'id': 'test', 'password': '1234', 'name': 'kim', 'age': 30}]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그런데 아직까지는 MySQL Workbench에서는 데이터가 등록되지 않은 상태이다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이는 아직 확인 절차에 있다는 것을 의미한다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;commit&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777210046269&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# _db에서 확정 작업(commit()), 결과를 DB와 동기화
_db.commit()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이후&amp;nbsp;Workbench를&amp;nbsp;확인해보면&amp;nbsp;데이터가&amp;nbsp;등록됨을&amp;nbsp;볼&amp;nbsp;수&amp;nbsp;있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;여러 데이터 입력&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777210084816&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 들어오는 데이터가 바뀔 때마다 query문을 작성하는 건 불필요

signup_query = &quot;&quot;&quot;
    INSERT INTO `user_info`
    VALUES (%s, %s, %s, %s)
&quot;&quot;&quot;

# %s 는 들어오는 데이터들의 위치를 지정
input_id = input('아이디를 입력하시오')
input_pass = input('비밀번호를 입력하시오')
input_name = input('이름을 입력하시오')
input_age = input('나이를 입력하시오')

# execute(query, [datas]) 함수에 리스트의 각각의 원소들이 %s 위치에 순서대로 대입
cursor.execute(signup_query, [input_id, input_pass, input_name, input_age])		# 1

cursor.execute(user_list_query)		# 2

cursor.fetchall()
# [{'id': 'test', 'password': '1234', 'name': 'kim', 'age': 30},
# {'id': 'test2', 'password': '1111', 'name': 'lee', 'age': 20}]&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  DB 연동 class 선언, 생성&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;로직&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;생성자 함수&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;서버의 정보를 입력한다. (매개변수 필요)&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;class가 생성이 될 때마다 다른 DB server에 정보를 담을 수 있다.&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2개의 객체를 생성하여 다른 DB server와 연동&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;함수 2개 생성&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;1. query문 실행 함수 (매개변수: query문(필수), *datas)&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;서버와의 연결&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;cursor 생성&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;CUD (insert, update, delete)&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;query문 작성&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;execute() 함수를 이용하여 cursor에 질의를 보낸다.&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;query문의 시작이 select라면 fetchall() 함수를 사용하여 데이터를 return&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;2. DataBase에 변화를 주는 함수&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;DB server에서 commit()을 이용해서 데이터 확정&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;DB server와의 연결을 종료(close())&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;코드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;class 선언&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777210489781&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;class MyDB:
    def __init__(self, host, port, user, password, db):
        self.host = host
        self.port = port
        self.user = user
        self.password = password
        self.db = db

    # 데이터베이스에 변화를 주는 함수 
    def commit(self):
        try:
            self.db_server.commit()
            self.db_server.close()
            del self.db_server
        except:
            print( &quot;데이터베이스 서버와의 연결이 되어있지 않습니다. sql_query() 함수를 호출하여 서버와의 연결을 해주세요&quot; )
    
    def sql_query(self, query, *datas):
        try:
            self.db_server
            print('접속된 서버가 존재함')
        except:
            self.db_server = pymysql.connect(
                host = self.host, 
                port = self.port, 
                user = self.user, 
                password = self.password, 
                db = self.db
            )
        # cursor 생성 
        cursor = self.db_server.cursor(pymysql.cursors.DictCursor)

        try:
            cursor.execute(query, datas)
            if query.lstrip().lower().startswith('select'):
                result = cursor.fetchall()
            else:
                result = &quot;Query OK!&quot;
            return result
        except Exception as e:
            print('query문 execute중 에러')
            print(e)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;class 생성&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777210558799&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;db1= MyDB(
    host = os.getenv('host'),
    port = int(os.getenv('port')),
    user = os.getenv('user'),
    password = os.getenv('pwd'),
    db = os.getenv('db_name')
)

db1.sql_query('a')
db1.commit()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;&lt;span style=&quot;color: #ee2323;&quot;&gt;생성자 함수에 콤마 넣지 말자, &lt;/span&gt;&lt;/i&gt;&lt;span style=&quot;color: #ee2323;&quot;&gt;&lt;i&gt;port에 int 제대로 쓰자&lt;/i&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #dddddd;&quot;&gt;&lt;s&gt;&lt;i&gt;다 했는데 안 되면 재시작해라 그게 최고다&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;data insert&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777210659705&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;insert_query = &quot;&quot;&quot;
    INSERT INTO `user_info`
    VALUES (%s, %s, %s, %s)
&quot;&quot;&quot;
select_query = &quot;&quot;&quot;
    SELECT * FROM `user_info`
&quot;&quot;&quot;

data_list = ['test3', '0000', 'lee', 40]
db1.sql_query(insert_query, *data_list)

# 접속된 서버가 존재함
# 'Query OK!'&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;data 확인&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777210698232&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;db1.sql_query(select_query)

# [{'id': 'test', 'password': '1234', 'name': 'kim', 'age': 30},
# {'id': 'test3', 'password': '0000', 'name': 'lee', 'age': 40}]&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;data delete&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777210721863&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;delete_query = &quot;&quot;&quot;
    DELETE FROM `user_info`
&quot;&quot;&quot;

data_list = ['test']

db1.sql_query(delete_query)

# 접속된 서버가 존재함
# 'Query OK!'&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;??: 다 했는데 안 되면 재시작해라 그게 최고다&lt;/i&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;color: #ee2323; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;아니? 새로고침 안 좋은데? 초기화 안 좋은데?ㅠㅠ 초기화되면 로딩했던 거 다 사라지는데??&lt;/i&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;한참동안&amp;nbsp;고쳐지지&amp;nbsp;않는&amp;nbsp;에러가&amp;nbsp;떠서&amp;nbsp;강사님&amp;nbsp;코드도&amp;nbsp;복붙해보고...&amp;nbsp;프로그램을&amp;nbsp;새로&amp;nbsp;키기를&amp;nbsp;몇번이나&amp;nbsp;했고&amp;nbsp;라이브러리도&amp;nbsp;죄다&amp;nbsp;새로&amp;nbsp;import&amp;nbsp;해놓았는데 &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;정말 바보도 아니고 라이브러리는 임포트했으면서 &lt;span style=&quot;background-color: #dddddd;&quot;&gt;&amp;nbsp;load_dotenv()&amp;nbsp;&lt;/span&gt; 를&amp;nbsp;실행하지&amp;nbsp;않고&amp;nbsp;계속&amp;nbsp;작업한&amp;nbsp;것이다... &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;그리고&amp;nbsp;콤마!!!!&amp;nbsp;자꾸&amp;nbsp;써야할&amp;nbsp;곳에&amp;nbsp;안&amp;nbsp;쓰고&amp;nbsp;안&amp;nbsp;써야할&amp;nbsp;곳에&amp;nbsp;쓴다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;언젠가&amp;nbsp;또&amp;nbsp;실수할&amp;nbsp;것이&amp;nbsp;뻔하므로&amp;nbsp;기록... &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;모듈화&lt;/b&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;py&amp;nbsp;파일에&amp;nbsp;import한&amp;nbsp;라이브러리들과&amp;nbsp;아까&amp;nbsp;만들어뒀던&amp;nbsp;클래스를&amp;nbsp;붙여넣어&amp;nbsp;모듈화&amp;nbsp;진행&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777210872421&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 파일명 db.py

import db
db3 = db.MyDB()
db3.sql_query(select_query)

# [{'id': 'test', 'password': '1234', 'name': 'kim', 'age': 30}]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;잘&amp;nbsp;불러와지는&amp;nbsp;것을&amp;nbsp;확인할&amp;nbsp;수&amp;nbsp;있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  Open API&lt;/b&gt; &lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;개인적으로&amp;nbsp;학교에서&amp;nbsp;알려준&amp;nbsp;바가&amp;nbsp;없어서&amp;nbsp;가장&amp;nbsp;궁금했던&amp;nbsp;부분이었던&amp;nbsp;Open&amp;nbsp;API&amp;nbsp;활용... &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;Key를&amp;nbsp;발행하는&amp;nbsp;것까지는&amp;nbsp;해본&amp;nbsp;적이&amp;nbsp;있는데&amp;nbsp;그&amp;nbsp;이후에&amp;nbsp;이&amp;nbsp;Key를&amp;nbsp;가지고&amp;nbsp;뭐&amp;nbsp;어떡하라는&amp;nbsp;건지&amp;nbsp;몰라서&amp;nbsp;마구&amp;nbsp;찾아보다&amp;nbsp;그냥&amp;nbsp;돌아선&amp;nbsp;적이&amp;nbsp;있다  &lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;data download&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;a title=&quot;재단법인 충북 테크노파크 데이터셋&quot; href=&quot;https://www.data.go.kr/data/15111004/openapi.do#/API%20%EB%AA%A9%EB%A1%9D/corp%2FgetEsgStatus)&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://www.data.go.kr/data/15111004/openapi.do#/API%20%EB%AA%A9%EB%A1%9D/corp%2FgetEsgStatus&lt;/a&gt;데이터를 받아와서 python에서 DataFrame으로 변형시켰다. &lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777211126208&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 라이브러리 설치/로드
!pip install requests
import requests

# 인증키 변수에 저장

service_key = '내가 발급받은 API key'
page_no = 1
num_of_rows = 100

url = 'https://apis.data.go.kr/B553069/esg/corp/getEsgStatus'

# 파라미터 지정

params = {
    'serviceKey': service_key,
    'pageNo' : page_no,
    'numOfRows': num_of_rows
}

# server에게 요청을 보낸다
# requests 안에 get() 함수를 이용. 대부분 get을 사용
    # get()
        # 첫번째 인자에는 주소
        # params 매개변수: 파라미터 값들을 dict 형태로 대입

res = requests.get(url, params=params)

res # &amp;lt;Response [200]&amp;gt;
type(res.content) # bytes&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;bytes를 dictionary type으로 변환&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777211184993&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import json
res_data = json.loads(res.content)
type(res_data) # dict

# data를 예쁘게 출력해서 보고 싶을 때
from pprint import pprint
pprint(res_data)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리1.png&quot; data-origin-width=&quot;882&quot; data-origin-height=&quot;765&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dPnP1G/dJMcad2OYz0/pmATxFzkj3YQ5MlHDoebq0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dPnP1G/dJMcad2OYz0/pmATxFzkj3YQ5MlHDoebq0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dPnP1G/dJMcad2OYz0/pmATxFzkj3YQ5MlHDoebq0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdPnP1G%2FdJMcad2OYz0%2FpmATxFzkj3YQ5MlHDoebq0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;374&quot; height=&quot;324&quot; data-filename=&quot;티스토리1.png&quot; data-origin-width=&quot;882&quot; data-origin-height=&quot;765&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;dictionary를 DataFrame으로 변환&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #9d9d9d; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;s&gt;&lt;i&gt;배운지 얼마나 됐다고 아는 내용 나오니 이리 기쁠 수가 없었다&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777211336805&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df = pd.DataFrame(res_data['response']['body']['items'])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;xml,&amp;nbsp;json&amp;nbsp;파일을&amp;nbsp;DataFrame으로&amp;nbsp;변환&lt;/span&gt; &lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;또 다른 데이터셋을 download &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;a href=&quot;https://www.data.go.kr/data/15126832/openapi.do#/API%20%EB%AA%A9%EB%A1%9D/pscd)&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://www.data.go.kr/data/15126832/openapi.do#/API%20%EB%AA%A9%EB%A1%9D/pscd&lt;/a&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 데이터를 다운로드했다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;env 파일을 활용하여 데이터 load&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;env 파일에 다음과 같이 작성하여 저장&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777211460899&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;service_key = '내가 발급받은 API key'&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;xml 데이터 변환&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777211517055&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;url = 'https://apis.data.go.kr/B010003/kamcoRlctRlst/pscd'

params = {
    'serviceKey': os.getenv('service_key'),
    'pageNo' : 1,
    'numOfRows': 100,
    'resultType': 'xml'
}

res = requests.get(url, params=params)
res # &amp;lt;Response [200]&amp;gt;

# -----------------------------------------------------

!pip install xmltodict
# xml을 dict 형태로 변환하는 라이브러리
from xmltodict import parse

res_data_xml = parse(res.content)
pprint(res_data_xml['response']['body']['items']['item'])&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리2.png&quot; data-origin-width=&quot;717&quot; data-origin-height=&quot;730&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/blnils/dJMcabqtD2S/ElJlRWtw09NefLgkHLHUrk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/blnils/dJMcabqtD2S/ElJlRWtw09NefLgkHLHUrk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/blnils/dJMcabqtD2S/ElJlRWtw09NefLgkHLHUrk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fblnils%2FdJMcabqtD2S%2FElJlRWtw09NefLgkHLHUrk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;457&quot; height=&quot;730&quot; data-filename=&quot;티스토리2.png&quot; data-origin-width=&quot;717&quot; data-origin-height=&quot;730&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1777211991785&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df3 = pd.DataFrame(res_data_xml['response']['body']['items']['item'])&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;json 데이터 변환&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777212074771&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;params2 = {
    'serviceKey': os.getenv('service_key'),
    'pageNo' : 1,
    'numOfRows': 100,
    'resultType': 'json'
}

res2 = requests.get(url, params=params2)
# url은 xml 데이터 변환 코드에 작성되어 있음
res # &amp;lt;Response [200]&amp;gt;
pprint(res_data_json['body']['items']['item'])&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리3.png&quot; data-origin-width=&quot;903&quot; data-origin-height=&quot;843&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bu3Sxw/dJMcaakL1pJ/W4QuWMOkc0Ff24A8yHnQ7K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bu3Sxw/dJMcaakL1pJ/W4QuWMOkc0Ff24A8yHnQ7K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bu3Sxw/dJMcaakL1pJ/W4QuWMOkc0Ff24A8yHnQ7K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbu3Sxw%2FdJMcaakL1pJ%2FW4QuWMOkc0Ff24A8yHnQ7K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;443&quot; height=&quot;843&quot; data-filename=&quot;티스토리3.png&quot; data-origin-width=&quot;903&quot; data-origin-height=&quot;843&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1777212134316&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df2 = pd.DataFrame(res_data_json['body']['items']['item'])&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이렇게 하여 open API xml, json 형태의 데이터를 불러와 DataFrame 형식으로 변환할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;코드가 길고 불러올 라이브러리들도 많으니 코드의 가독성이 이래서 필요하구나 싶다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #dddddd; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;i&gt;&lt;s&gt;앞으로 더 복잡해지겠지&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이제 더 점점 어려워질테니 마음 단단히 먹고 가자 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #fe6b00;&quot;&gt;4월 21일 &amp;zwj;  &lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의&amp;nbsp;소감&lt;/b&gt;:&amp;nbsp;아래&amp;nbsp;이미지로&amp;nbsp;대체 &lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imagegridblock&quot;&gt;
  &lt;div class=&quot;image-container&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/IhwJq/dJMcacv9w2E/XfK6u6LjPjPNS0WHKy5iT1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/IhwJq/dJMcacv9w2E/XfK6u6LjPjPNS0WHKy5iT1/img.png&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;720&quot; data-is-animation=&quot;false&quot; data-filename=&quot;티스토리4.png&quot; style=&quot;width: 63.2558%; margin-right: 10px;&quot; data-widthpercent=&quot;64&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/IhwJq/dJMcacv9w2E/XfK6u6LjPjPNS0WHKy5iT1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FIhwJq%2FdJMcacv9w2E%2FXfK6u6LjPjPNS0WHKy5iT1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;720&quot;/&gt;&lt;/span&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9GMwz/dJMcacv9w2B/szYf6n7HiSudZXG0AlGbn0/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9GMwz/dJMcacv9w2B/szYf6n7HiSudZXG0AlGbn0/img.jpg&quot; data-is-animation=&quot;false&quot; data-origin-width=&quot;236&quot; data-origin-height=&quot;236&quot; data-filename=&quot;티스토리5.jpg&quot; style=&quot;width: 35.5814%;&quot; data-widthpercent=&quot;36&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9GMwz/dJMcacv9w2B/szYf6n7HiSudZXG0AlGbn0/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9GMwz%2FdJMcacv9w2B%2FszYf6n7HiSudZXG0AlGbn0%2Fimg.jpg&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;236&quot; height=&quot;236&quot;/&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;듀얼 모니터 없이 노트북만으로 HTML, 줌, VS code 병행하려니 죽을 맛이다... &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;결국 어제 모니터를 구매했다...ㅎㅎ&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #dddddd; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;s&gt;&lt;i&gt;왜 느는 게 실력이 아니라 장비욕심이냐&lt;/i&gt;&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  HTML&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;각종 태그 모음&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리6.png&quot; data-origin-width=&quot;1766&quot; data-origin-height=&quot;748&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/WdhBD/dJMcagrKPE5/QKY9ZVbXeWiw1r71LrbiVK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/WdhBD/dJMcagrKPE5/QKY9ZVbXeWiw1r71LrbiVK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/WdhBD/dJMcagrKPE5/QKY9ZVbXeWiw1r71LrbiVK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FWdhBD%2FdJMcagrKPE5%2FQKY9ZVbXeWiw1r71LrbiVK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1766&quot; height=&quot;748&quot; data-filename=&quot;티스토리6.png&quot; data-origin-width=&quot;1766&quot; data-origin-height=&quot;748&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리7.png&quot; data-origin-width=&quot;1857&quot; data-origin-height=&quot;531&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/rqYIK/dJMcahKZhFO/kf9N9ORY630oPzSSiXTTX0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/rqYIK/dJMcahKZhFO/kf9N9ORY630oPzSSiXTTX0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/rqYIK/dJMcahKZhFO/kf9N9ORY630oPzSSiXTTX0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FrqYIK%2FdJMcahKZhFO%2Fkf9N9ORY630oPzSSiXTTX0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1857&quot; height=&quot;531&quot; data-filename=&quot;티스토리7.png&quot; data-origin-width=&quot;1857&quot; data-origin-height=&quot;531&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;  크롤링 &lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;find()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;html&amp;nbsp;문서&amp;nbsp;안에서&amp;nbsp;특정&amp;nbsp;태그의&amp;nbsp;첫번째&amp;nbsp;정보를&amp;nbsp;출력 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; background-color: #dddddd;&quot;&gt;&amp;nbsp;find(속성명=속성값)&amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;결과값의 type은 TAG&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777214778647&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 라이브러리 설치/로드
!pip install bs4 selenium
import requests
from bs4 import BeautifulSoup as bs

res = requests.get(&quot;https://www.naver.com&quot;)
res.content     # type: bytes
res.text        # type: str
html_text = res.text

# 문자열에서 특정 문자의 위치를 찾는 함수: find()
html_text.find(&quot;네이버&quot;) # 378&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;parsor&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;문자&amp;nbsp;형태로&amp;nbsp;이루어진&amp;nbsp;html&amp;nbsp;문서를&amp;nbsp;BeautifulSoup&amp;nbsp;class를&amp;nbsp;이용해서&amp;nbsp;객체&amp;nbsp;변환 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;문자&amp;nbsp;데이터에서는&amp;nbsp;특정&amp;nbsp;영역의&amp;nbsp;content를&amp;nbsp;추출하기&amp;nbsp;어렵기에,&amp;nbsp;BeautifulSoup에&amp;nbsp;내장된&amp;nbsp;함수를&amp;nbsp;이용 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터를&amp;nbsp;추출하기&amp;nbsp;위해&amp;nbsp;class&amp;nbsp;생성&amp;nbsp;시&amp;nbsp;데이터를&amp;nbsp;대입&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777214815872&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;soup = bs(html_text, 'html.parser')
type(soup) # bs4.BeautifulSoup&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;find_all()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;html 문서 안에서 특정 태그의 모든 정보를 출력 &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;결과값의 type은 ResultSet (TAG의 list형태)&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777214865455&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# versis find

soup.find('a')
# &amp;lt;a href=&quot;#topAsideButton&quot;&amp;gt;&amp;lt;span&amp;gt;상단영역 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;


soup.find_all('a')
# [&amp;lt;a href=&quot;#topAsideButton&quot;&amp;gt;&amp;lt;span&amp;gt;상단영역 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#shortcutArea&quot;&amp;gt;&amp;lt;span&amp;gt;서비스 메뉴 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#newsstand&quot;&amp;gt;&amp;lt;span&amp;gt;새소식 블록 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#shopping&quot;&amp;gt;&amp;lt;span&amp;gt;쇼핑 블록 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#feed&quot;&amp;gt;&amp;lt;span&amp;gt;관심사 블록 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#account&quot;&amp;gt;&amp;lt;span&amp;gt;MY 영역 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#widgetboard&quot;&amp;gt;&amp;lt;span&amp;gt;위젯 보드 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;,
#  &amp;lt;a href=&quot;#viewSetting&quot;&amp;gt;&amp;lt;span&amp;gt;보기 설정 바로가기&amp;lt;/span&amp;gt;&amp;lt;/a&amp;gt;]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;+) find, find_all 함수는 TAG type에서는 사용 가능하지만, ResultSet type에서는 사용 불가능하다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;네이버 증권 사이트 크롤링&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #ee2323;&quot;&gt;미리 말하자면, 아래 코드는 `pd.read_html()` 한 줄로 대체되는 항목이다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777214943432&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

result = requests.get('https://finance.naver.com')
soup = bs(result.text, 'html.parser')

div_data = soup.find('div', attrs = {'class': 'section_sise_top'})

tables = div_data.find_all('table', attrs = {'class': 'tbl_home'})

# ------------------------------------------------------------------

# 실행 때마다 변수명이 다른 df 생성

vari_num = 1

for table_data in tables:
    # thead 추출: columns
    head_data = table_data.find('thead')
    cols = [th.string for th in th_list]

    # tbody 추출: values
    body_data = table_data.find('tbody')
    tr_list = body_data.find_all('tr')

    values = []

    for tr in tr_list:
        row_list = tr.find_all(['th', 'td'])
        value_data = [data.get_text().strip() for data in row_list]
        values.append(value_data)

    globals()[f&quot;df{vari_num}&quot;] = pd.DataFrame(values, columns=cols)
    vari_num += 1&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;pd.read_html()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;하지만 위 코드는 &lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; background-color: #dddddd; color: #000000; text-align: start;&quot;&gt;&amp;nbsp;pd.read_html()&amp;nbsp;&lt;/span&gt; 한 줄로 정리될 수 있다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #ee2323;&quot;&gt;이때, &lt;span style=&quot;background-color: #dddddd; color: #000000;&quot;&gt;&amp;nbsp;pd.read_html()&amp;nbsp;&lt;/span&gt; 은 테이블 형태에만 적용할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777215015145&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pd.read_html(str(div_data))

# os 에러가 나는 경우
from io import StringIO
pd.read_html(StringIO(str(div_data)), encoding='cp949')&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #fe6b00; font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;4월 22일 &amp;zwj;  &lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의 소감&lt;/b&gt;: 하루 했다고 html에 적응했나보다... 재밌다 &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  &lt;b&gt;크롤링 Review&lt;/b&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;a href=&quot;https://www.google.com/robots.txt&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://www.google.com/robots.txt&lt;/a&gt;&amp;nbsp;과&amp;nbsp;같이&amp;nbsp;robots.txt를&amp;nbsp;붙이면&amp;nbsp;크롤링&amp;nbsp;규칙이&amp;nbsp;나온다.&amp;nbsp;(Allow/Disallow) &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;get_text()를&amp;nbsp;이용하는&amp;nbsp;걸&amp;nbsp;자꾸&amp;nbsp;까먹는데,&amp;nbsp;데이터&amp;nbsp;type에&amp;nbsp;대해&amp;nbsp;추가적인&amp;nbsp;공부가&amp;nbsp;필요할&amp;nbsp;듯&amp;nbsp;하다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;(어떤 type이 get_text() 사용이 가능한지, 어떤 함수를 쓰면 어떤 type이 되는지)&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;학교에서&amp;nbsp;파이썬&amp;nbsp;배울&amp;nbsp;때는&amp;nbsp;for문&amp;nbsp;나올&amp;nbsp;때마다&amp;nbsp;머리를&amp;nbsp;감싸쥐고&amp;nbsp;절규했었는데, &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이 과정에서는 마치 구세주라도 찾는 것인 양 for문을 마구 찾아 쓰고 있었다... (왜 반복문이 만능인지 제대로 체감 중 )&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  크롤링-Selenium 활용 &lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;너무너무&amp;nbsp;신기했던&amp;nbsp;Selenium!!! &amp;nbsp;다들&amp;nbsp;이렇게&amp;nbsp;매크로&amp;nbsp;만드는구나... &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/s&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/s&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;i&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;다음 방켓팅 때 활용을&lt;/span&gt;&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;라이브러리 설치, 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777957129765&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;!pip install selenium
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium import webdriver
from seleniuhttp://m.webdriver.common.by import By
from seleniuhttp://m.webdriver.common.keys import Keys
import re&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;네이버 쇼핑 사이트 크롤링&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;네이버를&amp;nbsp;열고,&amp;nbsp;검색창에&amp;nbsp;검색하고,&amp;nbsp;enter를&amp;nbsp;누르는&amp;nbsp;것까지&amp;nbsp;모두&amp;nbsp;입력할&amp;nbsp;수&amp;nbsp;있다. &lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777957191369&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 웹 브라우저를 제어하기 위해 변수에 저장
driver = webdriver.Chrome()
driver.get(&quot;http://naver.com&quot;)

# 네이버 메인 화면에서 검색어를 입력하는 input Tag는 id가 query이다.
search_element = driver.find_element(By.ID, 'query')

# search_element &amp;rarr; 검색어 입력창을 의미
search_element.send_keys('아이폰')

# 검색어의 입력이 끝났으면 ENTER 키를 눌러서 검색을 시작
search_element.send_keys(Keys.ENTER)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이러면&amp;nbsp;갑자기&amp;nbsp;네이버에&amp;nbsp;접속해서&amp;nbsp;검색창에&amp;nbsp;아이폰이&amp;nbsp;입력되고&amp;nbsp;검색을&amp;nbsp;하는데&amp;nbsp;신세계였다...&amp;nbsp;이걸&amp;nbsp;내가&amp;nbsp;했다니&amp;nbsp;말도&amp;nbsp;안돼 &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;i&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;(팩트: 내가 한 게 아니다)&lt;/span&gt;&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777957749394&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 하이퍼링크의 콘텐츠 데이터가 '쇼핑'인 태그를 선택
len(driver.find_elements(By.LINK_TEXT, '쇼핑'))

shopping_button = driver.find_element(By.LINK_TEXT, '쇼핑')
# 쇼핑 버튼을 클릭한다.
shopping_button.click()&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리18.webp&quot; data-origin-width=&quot;718&quot; data-origin-height=&quot;400&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmZ1xD/dJMcaiXsXGN/Fk0NUIzLrado1yt23rfonk/img.webp&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmZ1xD/dJMcaiXsXGN/Fk0NUIzLrado1yt23rfonk/img.webp&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmZ1xD/dJMcaiXsXGN/Fk0NUIzLrado1yt23rfonk/img.webp&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmZ1xD%2FdJMcaiXsXGN%2FFk0NUIzLrado1yt23rfonk%2Fimg.webp&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;400&quot; height=&quot;223&quot; data-filename=&quot;티스토리18.webp&quot; data-origin-width=&quot;718&quot; data-origin-height=&quot;400&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;머릿속에서 계속 이게 생각났다... &lt;/span&gt;&lt;span style=&quot;color: #dddddd;&quot;&gt;&lt;i&gt;&lt;s&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;셀레니움아 자... 이게 클릭이야&lt;/span&gt;&lt;/s&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;원하는&amp;nbsp;화면이&amp;nbsp;나왔다면&amp;nbsp;python에&amp;nbsp;불러온다. &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1777957884303&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;html_data = driver.page_source
soup = bs(html_data, 'html.parser')


# 상품의 정보가 모두 들어있는 영역인 div 태그 중 id가 content인 태그를 추출
items_div = soup.find(
    'div',
    attrs = {
        'id': 'content'
    }
)

item_list = items_div.find_all(
    'div',
    attrs= {
        'class': re.compile('product_item')
    }
)

# 반복문을 이용하여 item_list의 상품명, 가격, 배송비 2차원 데이터로 생성

values = []

for item in item_list:
    
    # 상품명 추출
    item_name = item.find(
        'div',
        attrs = {
            'class': re.compile('product_title')
        }
    ).get_text()

    # 가격을 추출
    item_price = item.find(
        'span',
        attrs= {
            'class': 'price'
        }
    ).get_text()

    # 배송비 추출
    item_fee = item.find(
        'div',
        attrs= {
            'class' : re.compile('price_delivery_fee')
        }
    ).get_text()

    # 상품의 링크 주소를 추출
        # (상품명 추출해서 그 안에서 a 태그 찾고 href 속성 값 추출)
    name_tag = item.find(
        'div',
        attrs= {
            'class' : re.compile('product_title')
        }
    )
    item_url = name_tag.find('a')['href']

    # 상품명과 가격 배송비를 딕셔너리 형태로 생성하여 values에 추가
    values.append(
        {
            '상품명': item_name,
            '가격': item_price,
            '배송비': item_fee,
            'url': item_url
        }
    )

df = pd.DataFrame(values)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리19.png&quot; data-origin-width=&quot;1015&quot; data-origin-height=&quot;139&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cdOvcw/dJMcah5oj3z/dDc9ILwcZ9uo7mkwK3iuQk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cdOvcw/dJMcah5oj3z/dDc9ILwcZ9uo7mkwK3iuQk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cdOvcw/dJMcah5oj3z/dDc9ILwcZ9uo7mkwK3iuQk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcdOvcw%2FdJMcah5oj3z%2FdDc9ILwcZ9uo7mkwK3iuQk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1015&quot; height=&quot;139&quot; data-filename=&quot;티스토리19.png&quot; data-origin-width=&quot;1015&quot; data-origin-height=&quot;139&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;데이터프레임 형태로 만들 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  수집한 데이터를 mysql db server에 저장&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;to_sql()&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777957991885&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from sqlalchemy import create_engine
engine = create_engine(
    &quot;서버 주소&quot;
)

df.to_sql(name='naver', con=engine, index=False)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #fe6b00;&quot;&gt;4월 23일 &amp;zwj; &lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;b&gt;오늘의&amp;nbsp;소감&lt;/b&gt;: 오랜만에 SQL 다루니까 얘가 정말 직관적이구나 싶다...&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;python 문법은 아직도 가끔 까먹을 때가 있는데 SQL 문법은 까먹지를 않는다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  Python과 SQL 문법 비교 &lt;/span&gt;&lt;/b&gt;&lt;/h3&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR'; color: #9d9d9d;&quot;&gt;데이터 로드&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;emp라는&amp;nbsp;csv&amp;nbsp;파일&amp;nbsp;load &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;20일에 만들어뒀던 db.py 파일을 이용하였다.&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;※&amp;nbsp;pandas로&amp;nbsp;슬라이싱하는&amp;nbsp;경우&amp;nbsp;인덱스&amp;nbsp;번호가&amp;nbsp;유지되고,&amp;nbsp;query문을&amp;nbsp;작성하면&amp;nbsp;인덱스가&amp;nbsp;새롭게&amp;nbsp;들어간다. &lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777958602807&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import pandas as pd
from db import MyDB
db = MyDB()


query_1 = &quot;&quot;&quot;
    SELECT * FROM `emp`
&quot;&quot;&quot;

emp_df = pd.DataFrame(db.sql_query(query_1))
emp_df&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리20.png&quot; data-origin-width=&quot;639&quot; data-origin-height=&quot;454&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/7qqKP/dJMcaja0Vxa/hs0ahfrLEakZ7eohDH7oFk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/7qqKP/dJMcaja0Vxa/hs0ahfrLEakZ7eohDH7oFk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/7qqKP/dJMcaja0Vxa/hs0ahfrLEakZ7eohDH7oFk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F7qqKP%2FdJMcaja0Vxa%2Fhs0ahfrLEakZ7eohDH7oFk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;343&quot; height=&quot;244&quot; data-filename=&quot;티스토리20.png&quot; data-origin-width=&quot;639&quot; data-origin-height=&quot;454&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;&amp;nbsp;pd.read_csv('../csv/emp.csv') &lt;/span&gt;&amp;nbsp;도 가능하다. &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;  데이터 필터링&lt;/b&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt; SAL 컬럼의 데이터가 1500 이상인 사원의 모든 정보를 확인&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777958790529&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 1
emp_df[emp_df['SAL'] &amp;gt;= 1500]

# 2
emp_df.loc[emp_df['SAL'] &amp;gt;= 1500, ]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이때만 해도 1번 방식이 편했는데, col 조건식이 붙으니까 2번이 더 편했다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777958805207&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 위 조건식을 Query로 만든다면

query_3 = &quot;&quot;&quot;
    SELECT *
    FROM `emp`
    WHERE SAL &amp;gt;= 1500
&quot;&quot;&quot;

emp_filter = db.sql_query(query_3)
pd.DataFrame(emp_filter)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리21.png&quot; data-origin-width=&quot;632&quot; data-origin-height=&quot;277&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/GO3im/dJMcafmcsKq/SUia8lW1LIz4YxQHhHUrGk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/GO3im/dJMcafmcsKq/SUia8lW1LIz4YxQHhHUrGk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/GO3im/dJMcafmcsKq/SUia8lW1LIz4YxQHhHUrGk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FGO3im%2FdJMcafmcsKq%2FSUia8lW1LIz4YxQHhHUrGk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;413&quot; height=&quot;181&quot; data-filename=&quot;티스토리21.png&quot; data-origin-width=&quot;632&quot; data-origin-height=&quot;277&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;SAL이 1500 이상이고, JOB이 MANAGER인 사원의 사원번호와 이름을 출력&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777958894845&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas
flag_1 = emp_df['SAL'] &amp;gt;= 1500
flag_2 = emp_df['JOB'] == 'MANAGER'
col_flag = ['EMPNO', 'ENAME']

emp_df.loc[ flag_1 &amp;amp; flag_2, col_flag]


# sql query
query_4 = &quot;&quot;&quot;
    SELECT EMPNO, ENAME
    FROM `emp`
    WHERE SAL &amp;gt;= 1500 AND JOB = 'MANAGER'
&quot;&quot;&quot;

pd.DataFrame(db.sql_query(query_4))&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;SAL 1500 이상 2500 이하인 사원의 모든 정보&lt;/b&gt; 확인&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777959671082&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas 1
flag_3 = emp_df['SAL'] &amp;gt;= 1500
flag_4 = emp_df['SAL'] &amp;lt;= 2500
emp_df.loc[flag_3 &amp;amp; flag_4, ]

# pandas 2
emp_df.loc[
    emp_df['SAL'].isin(range(1500,2501)),
]

# pandas 3
emp_df.loc[
    emp_df['SAL'].between(1500, 2500)
]


# sql query
query_5 = &quot;&quot;&quot;
    SELECT *
    FROM `emp`
    WHERE SAL BETWEEN 1500 AND 2500
&quot;&quot;&quot;

pd.DataFrame(db.sql_query(query_5))&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;모두&amp;nbsp;동일한&amp;nbsp;결과값이&amp;nbsp;추출된다. &lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리22.png&quot; data-origin-width=&quot;629&quot; data-origin-height=&quot;130&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/zigCl/dJMcaciHqlr/NtYRVsv1L3kzuVyoNLXWFk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/zigCl/dJMcaciHqlr/NtYRVsv1L3kzuVyoNLXWFk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/zigCl/dJMcaciHqlr/NtYRVsv1L3kzuVyoNLXWFk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzigCl%2FdJMcaciHqlr%2FNtYRVsv1L3kzuVyoNLXWFk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;469&quot; height=&quot;97&quot; data-filename=&quot;티스토리22.png&quot; data-origin-width=&quot;629&quot; data-origin-height=&quot;130&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;JOB이 MANAGER 이거나 SALESMAN인 모든 사원 정보를 확인&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1777959774087&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas 1
flag_5 = emp_df['JOB'] == 'MANAGER'
flag_6 = emp_df['JOB'] == 'SALESMAN'
emp_df.loc[flag_5 | flag_6, ]

# pandas 2
flag_7 = emp_df['JOB'].isin(['MANAGER', 'SALESMAN'])
emp_df.loc[flag_7,]

# sql query
query_6 = &quot;&quot;&quot;
    SELECT *
    FROM `emp`
    WHERE JOB in ('MANAGER', 'SALESMAN')
&quot;&quot;&quot;
pd.DataFrame(db.sql_query(query_6))&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;역시 동일한 결과가 추출된다. &lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리23.png&quot; data-origin-width=&quot;630&quot; data-origin-height=&quot;248&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/3iKy4/dJMcahEhms2/lJ2KHWOxk6ieKpOa7rYheK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/3iKy4/dJMcahEhms2/lJ2KHWOxk6ieKpOa7rYheK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/3iKy4/dJMcahEhms2/lJ2KHWOxk6ieKpOa7rYheK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F3iKy4%2FdJMcahEhms2%2FlJ2KHWOxk6ieKpOa7rYheK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;401&quot; height=&quot;158&quot; data-filename=&quot;티스토리23.png&quot; data-origin-width=&quot;630&quot; data-origin-height=&quot;248&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;계속&amp;nbsp;pd.DataFrame&amp;nbsp;쓰기&amp;nbsp;귀찮아서&amp;nbsp;함수를&amp;nbsp;만들었다.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;pre id=&quot;code_1777960067475&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def sql(query):
    result = pd.DataFrame(
        db.sql_query(query)
    )

    return result&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;사원의 이름이 S로 시작하는 사람, S가 포함된 사람 추출&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777960220100&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas 1: for문

# s로 시작하는 사람
word = 's'
flag_list = []
for name in emp_df['ENAME']:
    flag_list.append(
        name.lower().startswith(word.lower())
    )
emp_df.loc[flag_list,]

# S로 끝나는 경우
        # name.lower().endswith(word.lower())
        
        # S를 포함하는 경우
        # word.lower() in name.lower()

# -----------------------------------------------------

# pandas 2: for문 없이

# s로 시작하는 사람
word = 's'
flag_8 = emp_df['ENAME'].str.lower().str.startswith(word.lower())
emp_df.loc[flag_8,]
# s로 끝나는 경우
                                        # .endswith
# s를 포함하는 경우
                                        # .contains


# -----------------------------------------------------

# sql query
# pandas에서 %는 특수기호이기에 %만 사용해서는 작동하지 않는다

# s로 시작하는 사람
query_7 = &quot;&quot;&quot;
    SELECT *
    FROM `multicam`.`emp`
    WHERE ENAME LIKE &quot;s%%&quot;
&quot;&quot;&quot;
# S가 포함되는 경우
    # WHERE ENAME LIKE &quot;%%s%%&quot;

sql(query_7)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;&amp;nbsp;s로&amp;nbsp;시작하는&amp;nbsp;경우&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리24.png&quot; data-origin-width=&quot;614&quot; data-origin-height=&quot;101&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bX934J/dJMcad2U6fg/Mc8L7qATSkzN8MxjhFYWTk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bX934J/dJMcad2U6fg/Mc8L7qATSkzN8MxjhFYWTk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bX934J/dJMcad2U6fg/Mc8L7qATSkzN8MxjhFYWTk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbX934J%2FdJMcad2U6fg%2FMc8L7qATSkzN8MxjhFYWTk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;340&quot; height=&quot;56&quot; data-filename=&quot;티스토리24.png&quot; data-origin-width=&quot;614&quot; data-origin-height=&quot;101&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #333333; text-align: start;&quot;&gt;s를 포함하는 경우&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리25.png&quot; data-origin-width=&quot;628&quot; data-origin-height=&quot;187&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/DZDuS/dJMcabYoXt6/n0t8fWxk731p327UwxS8o0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/DZDuS/dJMcabYoXt6/n0t8fWxk731p327UwxS8o0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/DZDuS/dJMcabYoXt6/n0t8fWxk731p327UwxS8o0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FDZDuS%2FdJMcabYoXt6%2Fn0t8fWxk731p327UwxS8o0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;359&quot; height=&quot;107&quot; data-filename=&quot;티스토리25.png&quot; data-origin-width=&quot;628&quot; data-origin-height=&quot;187&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&amp;nbsp;python, sql 모두 동일한 결과가 추출된다. &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  데이터 결합 &lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;join 결합&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;emp table과 dept table을 DEPTNO라는 column을 통해 결합&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777960497705&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas
pd.merge(emp_df, dept_df, on='DEPTNO', how='left')&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리26.png&quot; data-origin-width=&quot;837&quot; data-origin-height=&quot;460&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/mAd4U/dJMcaipGqL7/1erfXP7cysVenY4iTvRl11/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/mAd4U/dJMcaipGqL7/1erfXP7cysVenY4iTvRl11/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/mAd4U/dJMcaipGqL7/1erfXP7cysVenY4iTvRl11/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FmAd4U%2FdJMcaipGqL7%2F1erfXP7cysVenY4iTvRl11%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;426&quot; height=&quot;234&quot; data-filename=&quot;티스토리26.png&quot; data-origin-width=&quot;837&quot; data-origin-height=&quot;460&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1777960849903&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# SQL

query_8 = &quot;&quot;&quot;
    SELECT *
    FROM emp
        LEFT JOIN dept
            ON emp.DEPTNO = dept.DEPTNO
&quot;&quot;&quot;

sql(query_8)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리27.png&quot; data-origin-width=&quot;947&quot; data-origin-height=&quot;455&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b2JhTX/dJMcaiwp3qX/ki9L3Y98Dpbb4bBPurCKnk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b2JhTX/dJMcaiwp3qX/ki9L3Y98Dpbb4bBPurCKnk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b2JhTX/dJMcaiwp3qX/ki9L3Y98Dpbb4bBPurCKnk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb2JhTX%2FdJMcaiwp3qX%2Fki9L3Y98Dpbb4bBPurCKnk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;448&quot; height=&quot;215&quot; data-filename=&quot;티스토리27.png&quot; data-origin-width=&quot;947&quot; data-origin-height=&quot;455&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;pandas에서 column이 다를 때 join&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777961470973&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;dept_df2 = dept_df.copy()
dept_df2.rename(
    columns={
        'DEPTNO' : 'DEPTNUM'
    }, inplace=True
)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이후 아까 한 것과 같이 &lt;span style=&quot;background-color: #dddddd;&quot;&gt;&amp;nbsp;pd.merge(emp_df, dept_df2, on='DEPTNO', how='left') &lt;/span&gt;를 하게 되면 ERROR가 발생한다. &lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;이&amp;nbsp;경우&amp;nbsp;결합하고자&amp;nbsp;하는&amp;nbsp;데이터프레임의&amp;nbsp;key를&amp;nbsp;모두&amp;nbsp;써주면&amp;nbsp;결합에&amp;nbsp;성공한다. &lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777961509184&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;pd.merge(
    emp_df, dept_df2, left_on='DEPTNO', right_on='DEPTNUM', how='left'
)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리28.png&quot; data-origin-width=&quot;927&quot; data-origin-height=&quot;454&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nCnrc/dJMcacC03w8/mHqExuekf2tUnkp8ekmTm1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nCnrc/dJMcacC03w8/mHqExuekf2tUnkp8ekmTm1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nCnrc/dJMcacC03w8/mHqExuekf2tUnkp8ekmTm1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnCnrc%2FdJMcacC03w8%2FmHqExuekf2tUnkp8ekmTm1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;394&quot; height=&quot;193&quot; data-filename=&quot;티스토리28.png&quot; data-origin-width=&quot;927&quot; data-origin-height=&quot;454&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;union&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;tran_1과&amp;nbsp;tran_2&amp;nbsp;table을&amp;nbsp;union&amp;nbsp;결합 &lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777961638747&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sql
query_9 = &quot;&quot;&quot;
    SELECT * FROM `tran_1`
    UNION
    SELECT * FROM `tran_2`
&quot;&quot;&quot;
sql(query_9)


# pandas
tran_1 = pd.read_csv('../csv/tran_1.csv')
tran_2 = pd.read_csv('../csv/tran_2.csv')
tran_d1 = pd.read_csv('../csv/tran_d_1.csv')
tran_d2 = pd.read_csv('../csv/tran_d_2.csv')

pd.concat([tran_1, tran_2], axis=0, ignore_index=True)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리29.png&quot; data-origin-width=&quot;497&quot; data-origin-height=&quot;402&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/DxwZ0/dJMcabRB1Hy/5wXazbTqMcThGttiQmcoo0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/DxwZ0/dJMcabRB1Hy/5wXazbTqMcThGttiQmcoo0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/DxwZ0/dJMcabRB1Hy/5wXazbTqMcThGttiQmcoo0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FDxwZ0%2FdJMcabRB1Hy%2F5wXazbTqMcThGttiQmcoo0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;284&quot; height=&quot;230&quot; data-filename=&quot;티스토리29.png&quot; data-origin-width=&quot;497&quot; data-origin-height=&quot;402&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;&amp;nbsp;ignore_index = True&amp;nbsp;&lt;/span&gt; 때문에 index가 동일한 모습으로 나타난다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;join: 중복된 키 select&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;잠시 다시 join으로 돌아와서, join을 수행한 key를 select하는 과정에 대해 풀이한다.&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;transaction_id를&amp;nbsp;동일한&amp;nbsp;key로&amp;nbsp;가지는&amp;nbsp;tran_1과&amp;nbsp;tran_d_1&amp;nbsp;테이블을&amp;nbsp;조인&amp;nbsp;결합한다.&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1777961756765&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;query_11 = &quot;&quot;&quot;
    SELECT
        transaction_id,
        price,
        payment_date,
        quantity
    FROM
    `tran_1` `t1`
    LEFT JOIN
    `tran_d_1` `td1`
    ON
    t1.transaction_id = td1.transaction_id
&quot;&quot;&quot;

sql(query_11)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리30.png&quot; data-origin-width=&quot;494&quot; data-origin-height=&quot;97&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cwOv6n/dJMcaicaDGF/CgMLid8O37Z5hBwMHCykR0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cwOv6n/dJMcaicaDGF/CgMLid8O37Z5hBwMHCykR0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cwOv6n/dJMcaicaDGF/CgMLid8O37Z5hBwMHCykR0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcwOv6n%2FdJMcaicaDGF%2FCgMLid8O37Z5hBwMHCykR0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;351&quot; height=&quot;69&quot; data-filename=&quot;티스토리30.png&quot; data-origin-width=&quot;494&quot; data-origin-height=&quot;97&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;해당 문제를 해결하려면 select문에 있는 transaction_id를 어느 테이블에서 가져올 것인지를 명시해야한다. &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1777961815984&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# tran_1과 tran_d_1 테이블을 조인 결합

query_11 = &quot;&quot;&quot;
    SELECT
        t1.transaction_id,
        price,
        payment_date,
        quantity
    FROM
    `tran_1` `t1`
    LEFT JOIN
    `tran_d_1` `td1`
    ON
    t1.transaction_id = td1.transaction_id
&quot;&quot;&quot;

sql(query_11)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리31.png&quot; data-origin-width=&quot;489&quot; data-origin-height=&quot;409&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ZwpAf/dJMcacwfEHr/yQCfbyDNZsBrPOP3qKycUK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ZwpAf/dJMcacwfEHr/yQCfbyDNZsBrPOP3qKycUK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ZwpAf/dJMcacwfEHr/yQCfbyDNZsBrPOP3qKycUK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FZwpAf%2FdJMcacwfEHr%2FyQCfbyDNZsBrPOP3qKycUK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;273&quot; height=&quot;228&quot; data-filename=&quot;티스토리31.png&quot; data-origin-width=&quot;489&quot; data-origin-height=&quot;409&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #9d9d9d;&quot;&gt;그나저나 as 없이도 이름 변경 가능한 거 충격이다... 적응하면 편할 것 같긴 한데 매번 as 써왔던 입장에서는 어색하달까...&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;  데이터 정렬 &lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;ENAME을 기준으로 내림차순 정렬&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778113583795&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sql
query_12 = &quot;&quot;&quot;
    SELECT * FROM
    `emp` ORDER BY ENAME DESC
&quot;&quot;&quot;
sql(query_12)


# pandas
emp_df.sort_values('ENAME', ascending=False)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리32.png&quot; data-origin-width=&quot;638&quot; data-origin-height=&quot;452&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/77QN6/dJMcaad81fY/PwVGBMw4fcAzMQ43BKVSgK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/77QN6/dJMcaad81fY/PwVGBMw4fcAzMQ43BKVSgK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/77QN6/dJMcaad81fY/PwVGBMw4fcAzMQ43BKVSgK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F77QN6%2FdJMcaad81fY%2FPwVGBMw4fcAzMQ43BKVSgK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;368&quot; height=&quot;261&quot; data-filename=&quot;티스토리32.png&quot; data-origin-width=&quot;638&quot; data-origin-height=&quot;452&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;  그룹화&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;JOB으로 그룹화하여 SAL의 통계량 추출&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1778113892737&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sql
# pandas와 다르게 SELECT 문에 그룹화 연산을 적어주어야 함.

query_13 = &quot;&quot;&quot;
    SELECT
        JOB,
        AVG(SAL) AS SAL_AVG,
        SUM(SAL) AS SAL_SUM
    FROM `emp`
    GROUP BY JOB
&quot;&quot;&quot;
sql(query_13)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리33.png&quot; data-origin-width=&quot;306&quot; data-origin-height=&quot;185&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cHOqvB/dJMcahj3HIg/sdZxM1mqRfK29HvE2UtYz1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cHOqvB/dJMcahj3HIg/sdZxM1mqRfK29HvE2UtYz1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cHOqvB/dJMcahj3HIg/sdZxM1mqRfK29HvE2UtYz1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcHOqvB%2FdJMcahj3HIg%2FsdZxM1mqRfK29HvE2UtYz1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;218&quot; height=&quot;132&quot; data-filename=&quot;티스토리33.png&quot; data-origin-width=&quot;306&quot; data-origin-height=&quot;185&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;pre id=&quot;code_1778113993603&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas
emp_df[ ['JOB', 'SAL'] ].groupby('JOB').agg(['mean', 'sum', 'count'])&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리34.png&quot; data-origin-width=&quot;322&quot; data-origin-height=&quot;250&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/duff9h/dJMcacbWzG5/FP0MrydSZFA1iP9okDx5M1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/duff9h/dJMcacbWzG5/FP0MrydSZFA1iP9okDx5M1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/duff9h/dJMcacbWzG5/FP0MrydSZFA1iP9okDx5M1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fduff9h%2FdJMcacbWzG5%2FFP0MrydSZFA1iP9okDx5M1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;222&quot; height=&quot;172&quot; data-filename=&quot;티스토리34.png&quot; data-origin-width=&quot;322&quot; data-origin-height=&quot;250&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;형태는&amp;nbsp;조금&amp;nbsp;다르지만&amp;nbsp;값은&amp;nbsp;동일하게&amp;nbsp;나오는&amp;nbsp;것을&amp;nbsp;볼&amp;nbsp;수&amp;nbsp;있다.&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;값들의 정렬도 조금 다른데, 이는 SQL 쿼리문에서 HAVING을 통해 동일하게 만들 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt; &amp;nbsp;서브쿼리&lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;blockquote data-ke-size=&quot;size16&quot; data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;&lt;b&gt;부서의 지역이 NEW YORK, CHICAGO인 부서의 번호를 받아서 사원의 정보를 확인&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778114342763&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sql
query_16 = &quot;&quot;&quot;
    SELECT *
    FROM `emp`
    WHERE DEPTNO IN
    (
        SELECT DEPTNO
        FROM `dept`
        WHERE LOC IN ('NEW YORK', 'CHICAGO')
    )
&quot;&quot;&quot;
sql(query_16)


# pandas
flag_9 = dept_df.loc[
    dept_df['LOC'].isin( ['NEW YORK', &quot;CHICAGO&quot;] ), 'DEPTNO'
]
emp_df.loc[emp_df['DEPTNO'].isin(flag_9), ]&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리35.png&quot; data-origin-width=&quot;632&quot; data-origin-height=&quot;308&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ZzHZG/dJMcad2WexH/c5XPekb9pzIGnWi6vZbAB1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ZzHZG/dJMcad2WexH/c5XPekb9pzIGnWi6vZbAB1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ZzHZG/dJMcad2WexH/c5XPekb9pzIGnWi6vZbAB1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FZzHZG%2FdJMcad2WexH%2Fc5XPekb9pzIGnWi6vZbAB1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;482&quot; height=&quot;235&quot; data-filename=&quot;티스토리35.png&quot; data-origin-width=&quot;632&quot; data-origin-height=&quot;308&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;둘&amp;nbsp;다&amp;nbsp;두&amp;nbsp;번의&amp;nbsp;작업을&amp;nbsp;거친다는&amp;nbsp;것에서&amp;nbsp;공통점이&amp;nbsp;있다.&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;pandas의 경우, flag_9에 담긴 코드를 &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;두번째 코드의 flag_9 자리에 그대로 써놓으면 &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;sql과&amp;nbsp;비슷하게&amp;nbsp;서브쿼리&amp;nbsp;느낌의&amp;nbsp;코드가&amp;nbsp;될&amp;nbsp;것이다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;  HAVING&lt;/b&gt;&lt;/h4&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;직종에서 ANALYST를 제외하고 직종별로 그룹화를 하여&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span style=&quot;color: #9d9d9d;&quot;&gt;SAL의 평균이 2500 이상인 데이터에서 평균 SAL이 높은 순서대로 출력&lt;/span&gt;&lt;/b&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1778114727859&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sql

query_17 = &quot;&quot;&quot;
    SELECT
        JOB,
        AVG(SAL) SAL_AVG
    FROM `emp`
    WHERE JOB != 'ANALYST'
    GROUP BY JOB
    HAVING SAL_AVG &amp;gt;= 2500
    ORDER BY SAL_AVG DESC
    
&quot;&quot;&quot;
sql(query_17)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1778114783501&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pandas

df = emp_df.loc[
    emp_df['JOB'] != 'ANALYST'
]
group_df = df[ ['JOB', 'SAL'] ].groupby('JOB').mean()
group_df = group_df.loc[
    group_df['SAL'] &amp;gt;= 2500
]
group_df.sort_values('SAL', ascending=False).reset_index()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리36.png&quot; data-origin-width=&quot;221&quot; data-origin-height=&quot;98&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b3Tmrt/dJMb99TOCP3/0DtKIPhYZO4krAHi7FXLT1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b3Tmrt/dJMb99TOCP3/0DtKIPhYZO4krAHi7FXLT1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b3Tmrt/dJMb99TOCP3/0DtKIPhYZO4krAHi7FXLT1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb3Tmrt%2FdJMb99TOCP3%2F0DtKIPhYZO4krAHi7FXLT1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;171&quot; height=&quot;76&quot; data-filename=&quot;티스토리36.png&quot; data-origin-width=&quot;221&quot; data-origin-height=&quot;98&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR'; color: #fe6b00; text-align: start;&quot;&gt; 3주차&amp;nbsp;소감&lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;오랜만에&amp;nbsp;SQL을&amp;nbsp;다루니까&amp;nbsp;반가웠고,&amp;nbsp;궁금했던&amp;nbsp;크롤링에&amp;nbsp;대해&amp;nbsp;알&amp;nbsp;수&amp;nbsp;있어&amp;nbsp;시간&amp;nbsp;가는&amp;nbsp;줄&amp;nbsp;몰랐던&amp;nbsp;주차였다.&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;역시 익숙한 것은 쉽고, 새로운 것은 어렵다. &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;하지만&amp;nbsp;새로운&amp;nbsp;것을&amp;nbsp;마주하는&amp;nbsp;것에&amp;nbsp;익숙해져야겠지.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;다음&amp;nbsp;주는&amp;nbsp;시각화다.&amp;nbsp;맷플롯립,&amp;nbsp;구글&amp;nbsp;애널리틱스부터&amp;nbsp;태블로까지!!&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;정말 필요로 했던 부분이자 나의 취약점이다. &lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Demilight', 'Noto Sans KR';&quot;&gt;완전히&amp;nbsp;내&amp;nbsp;것으로&amp;nbsp;만든다는&amp;nbsp;생각으로&amp;nbsp;열심히&amp;nbsp;하자!&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;티스토리37.jpg&quot; data-origin-width=&quot;300&quot; data-origin-height=&quot;168&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/5Zui6/dJMcab5aDH4/ZaaYwZYq2boChKurAkGgRk/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/5Zui6/dJMcab5aDH4/ZaaYwZYq2boChKurAkGgRk/img.jpg&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/5Zui6/dJMcab5aDH4/ZaaYwZYq2boChKurAkGgRk/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F5Zui6%2FdJMcab5aDH4%2FZaaYwZYq2boChKurAkGgRk%2Fimg.jpg&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;300&quot; height=&quot;168&quot; data-filename=&quot;티스토리37.jpg&quot; data-origin-width=&quot;300&quot; data-origin-height=&quot;168&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>멀티캠퍼스부트캠프</category>
      <category>데이터 분석가 부트캠프</category>
      <category>멀티캠퍼스부트캠프</category>
      <category>부트캠프</category>
      <author>가라어퍼</author>
      <guid isPermaLink="true">https://bbgw-oshoulder.tistory.com/3</guid>
      <comments>https://bbgw-oshoulder.tistory.com/3#entry3comment</comments>
      <pubDate>Sun, 26 Apr 2026 19:14:30 +0900</pubDate>
    </item>
  </channel>
</rss>