본문 바로가기

Machine Learning(머신러닝)

부동산 거래량 과 거래 금액 추이

부동산 거래량

In [102]:
#-*- encoding: utf8 -*-
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import unicodedata
In [103]:
import matplotlib
In [104]:
matplotlib.rc('font', family='AppleGothic')
In [193]:
def load_data(path):
    filenames = glob.glob(os.path.join(path, '*.csv'))
    ret = {}

    for fname in filenames:
        dft = pd.read_csv(fname)
        dft.columns = dft.columns.str.normalize('NFC')
        dft['거래금액(만원)'] = dft['거래금액(만원)'].str.replace(',', '')# extract('(\d+),(\d+)')
        dft['거래금액(만원)'] =  pd.to_numeric(dft['거래금액(만원)'])
        
        dname = fname.split('_')[0].split('/')[-1]
        ret[unicodedata.normalize('NFC', dname)] = dft
    return ret
In [194]:
df_dict = load_data('./trade/')
In [195]:
df_dict['서울'].head(3)
Out[195]:
시군구번지본번부번단지명전용면적(㎡)계약년월계약일거래금액(만원)건축년도도로명
0서울특별시 강남구 개포동658-16581개포6차우성아파트1동~8동79.9720180311~2013950021987언주로
1서울특별시 강남구 개포동658-16581개포6차우성아파트1동~8동54.982018041~1010750051987언주로
2서울특별시 강남구 개포동658-16581개포6차우성아파트1동~8동79.9720180621~3014500031987언주로
In [117]:
def cal_volume(data_dict):
    data = {}

    for key, dft in data_dict.items():
        data[key] = dft.groupby('계약년월').count()['시군구']
    df_count = pd.DataFrame(data)
    df_count.index = pd.to_datetime(df_count.index, format='%Y%m')
    
    return df_count
In [181]:
def cal_mean(data_dict):
    data = {}

    for key, dft in data_dict.items():
#         print(key)
        data[key] = dft[['계약년월', '거래금액(만원)']].groupby('계약년월').mean()['거래금액(만원)']
#     print(data)
    df_mean = pd.DataFrame(data)
    df_mean.index = pd.to_datetime(df_count.index, format='%Y%m')
    
    return df_mean
In [182]:
dfm = cal_mean(df_dict)
In [183]:
dfm
Out[183]:
경기도광주대구대전서울세종제주
계약년월
2018-03-0136157.29706221204.75464129619.06684122587.75254158728.30103232115.32150326638.266094
2018-04-0134924.96661521097.41219028412.40000022638.45281559856.12597531344.01901123813.879808
2018-05-0134452.95729821713.17541328147.34357022624.98096058969.57206731319.11550223434.927203
2018-06-0135404.03038622061.33698924812.81532622865.25966960210.09130931707.82700425095.180412
2018-07-0137705.97606721510.79766728311.54908923717.25456169906.17245331106.84090926684.820988
2018-08-0143247.27288723409.42218630673.88011827009.36391871688.79425331176.22488029121.371429
2018-09-0136653.89902322592.55984731996.98064926715.61750770335.15124232985.49253728613.664596
2018-10-0134506.45115420914.07618729582.40516126299.34637565617.93476331874.18431426417.256917
2018-11-0133816.16920320567.57067426999.50729325586.32973964199.91460732410.86065627265.452991
2018-12-0133667.32935020395.16942127599.82996922437.41032861598.97557829167.44186026439.788462
2019-01-0133166.54447019968.00777725912.95327723271.17946861272.51717830323.49090925148.101695
2019-02-0131301.96731718640.09655823383.78715221913.49541354783.57880127449.39877322897.721519
In [191]:
dfm2 = dfm / dfm.iloc[0]
dfm2.plot(figsize=(15,8), title='평균 거래 금액')
Out[191]:
<matplotlib.axes._subplots.AxesSubplot at 0x1113437f0>
In [188]:
df_count = cal_volume(df_dict)
In [192]:
df_count_nor = df_count / df_count.iloc[0]
df_count_nor.plot(figsize=(15,8), title='거래량')
Out[192]:
<matplotlib.axes._subplots.AxesSubplot at 0x113e39b00>