Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Grunfeld (1950) Investment Data""" 

2import pandas as pd 

3 

4from statsmodels.datasets import utils as du 

5 

6__docformat__ = 'restructuredtext' 

7 

8COPYRIGHT = """This is public domain.""" 

9TITLE = __doc__ 

10SOURCE = """This is the Grunfeld (1950) Investment Data. 

11 

12The source for the data was the original 11-firm data set from Grunfeld's Ph.D. 

13thesis recreated by Kleiber and Zeileis (2008) "The Grunfeld Data at 50". 

14The data can be found here. 

15http://statmath.wu-wien.ac.at/~zeileis/grunfeld/ 

16 

17For a note on the many versions of the Grunfeld data circulating see: 

18http://www.stanford.edu/~clint/bench/grunfeld.htm 

19""" 

20 

21DESCRSHORT = """Grunfeld (1950) Investment Data for 11 U.S. Firms.""" 

22 

23DESCRLONG = DESCRSHORT 

24 

25NOTE = """:: 

26 

27 Number of observations - 220 (20 years for 11 firms) 

28 

29 Number of variables - 5 

30 

31 Variables name definitions:: 

32 

33 invest - Gross investment in 1947 dollars 

34 value - Market value as of Dec. 31 in 1947 dollars 

35 capital - Stock of plant and equipment in 1947 dollars 

36 firm - General Motors, US Steel, General Electric, Chrysler, 

37 Atlantic Refining, IBM, Union Oil, Westinghouse, Goodyear, 

38 Diamond Match, American Steel 

39 year - 1935 - 1954 

40 

41 Note that raw_data has firm expanded to dummy variables, since it is a 

42 string categorical variable. 

43""" 

44 

45def load(as_pandas=None): 

46 """ 

47 Loads the Grunfeld data and returns a Dataset class. 

48 

49 Parameters 

50 ---------- 

51 as_pandas : bool 

52 Flag indicating whether to return pandas DataFrames and Series 

53 or numpy recarrays and arrays. If True, returns pandas. 

54 

55 Returns 

56 ------- 

57 Dataset instance: 

58 See DATASET_PROPOSAL.txt for more information. 

59 

60 Notes 

61 ----- 

62 raw_data has the firm variable expanded to dummy variables for each 

63 firm (ie., there is no reference dummy) 

64 """ 

65 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

66 

67def load_pandas(): 

68 """ 

69 Loads the Grunfeld data and returns a Dataset class. 

70 

71 Returns 

72 ------- 

73 Dataset instance: 

74 See DATASET_PROPOSAL.txt for more information. 

75 

76 Notes 

77 ----- 

78 raw_data has the firm variable expanded to dummy variables for each 

79 firm (ie., there is no reference dummy) 

80 """ 

81 data = _get_data() 

82 data.year = data.year.astype(float) 

83 raw_data = pd.get_dummies(data) 

84 ds = du.process_pandas(data, endog_idx=0) 

85 ds.raw_data = raw_data 

86 return ds 

87 

88 

89def _get_data(): 

90 data = du.load_csv(__file__, 'grunfeld.csv') 

91 return data