Example 12.Python API




Python API的安装


在已经安装了Python开发环境的电脑上,将DolphinDB提供的API安装包解压至任意目录下。进入该目录,确认setup.py文件的存在。然后运行以下命令就可以完成安装。



python setup.py install



我们可以使用以下命令更新已经安装的Python API:



python setup.py install --force



Python API的功能


我们可以在Python使用session方法创建DolphinDB连接对象,使用connect方法创建与DolphinDB数据库的连接。



import pandas as pd

import numpy as np

import dolphindb as ddb


conn=ddb.session()

conn.connect( 'localhost', 8848)



在下列例子中,我们连接正在运行的端口号为8848的本地DolphinDB服务器:


对于Python中的DolphinDB连接对象,我们提供以下方法:




>>> df = pd.DataFrame({'id': np.int32([1, 2, 3, 4, 3]), 'value':  np.double([7.8, 4.6, 5.1, 9.6, 0.1]), 'x': np.int32([5, 4, 3, 2, 1])})

>>> conn.upload({'t1': df})






>>> conn.run('wavg', [100, 60, 300], [1, 1.5, 2])

164.0





>>> df = pd.DataFrame({'id': np.int32([1, 2, 3, 4, 3]), 'value':  np.double([7.8, 4.6, 5.1, 9.6, 0.1]), 'x': np.int32([5, 4, 3, 2, 1])})

>>> conn.upload({'t1': df})

>>> conn.run('t1.value.avg()')

5.44


>>> conn.run('1 2 NULL')

array([  1.,   2.,  nan])


>>> z= conn.run("dict(1 2 3, `IBM`MSFT`GOOG)")

>>> z

{1: 'IBM', 2: 'MSFT', 3: 'GOOG'}

>>> z[1]

'IBM'


>>> conn.run('(1..6).reshape(3:2)')

(array([[1, 4],

      [2, 5],

      [3, 6]]), None, None)


>>> table_str = "n=20000\n"

>>> table_str += "t1=table(09:30:00+rand(18000, n) as timestamp, rand(100, n) as id, 100*(1+rand(100, n)) as qty, 5.0+rand(100.0,n) as price);\n"

>>> table_str += "select * from t1 where price>9"                                                  

>>> df = conn.run(table_str)

>>> df

     timestamp  id    qty       price

0      13:50:29   8    800   78.039556

1      09:45:13  39   6000   36.624369

2      09:34:39  45   5000   11.116828

3      11:54:26  26   8700   69.652368

4      12:07:52  20   9600   84.086700

...         ...  ..    ...         ...

19222  14:25:47  26   5500   78.223695

19223  13:08:36  49   7500  100.907622

19224  11:07:52  60   9500   57.600072

19225  11:47:36  31   9600   12.589440

19226  10:41:01  93    500   41.886152

[19227 rows x 4 columns]


>>> df.columns

Index([u'timestamp', u'id', u'qty', u'price'], dtype='object')





>>> dt = conn.table(data={'sym': ['A', 'B', 'B', 'A', 'A'], 'vol': [1, 3, 2, 5, 4], 'price': [16, 31, 28, 19, 22]})


>>> dt.tableName()

'Tb2e146a8'



请注意,每次使用tableName方法都会得到不同的表名,但是在下列例子中,表名保持不变。




>>> dt.showSQL()

'select price,sym,vol from Tb2e146a8'


>>> dt['price'].showSQL()

'select price from Tb2e146a8'


>>> dt.select('price').showSQL()

'select price from Tb2e146a8'


>>> dt.select("price").where(dt.price > 20).showSQL()

'select price from Tb2e146a8 where (price > 20)'


>>> dt.select("price").where(dt.price > 20).sort('price').showSQL()

'select price from Tb2e146a8 where (price > 20) order by price'


>>> dt.select(['sym','vol']).where(dt.price > 20).sort(['sym','vol desc']).showSQL()

'select sym,vol from Tb2e146a8 where (price > 20) order by sym,vol desc'


>>> dt[dt.price > 20].showSQL()

'select price,sym,vol from Tb2e146a8 where (price > 20)'





>>> dt.toDF()

  price sym  vol

0     16   A    1

1     31   B    3

2     28   B    2

3     19   A    5

4     22   A    4


>>> dt['price'].toDF()

  price

0     16

1     31

2     28

3     19

4     22


>>> dt.select('price').toDF()

  price

0     16

1     31

2     28

3     19

4     22


>>> dt.select("price").where(dt.price > 20).toDF()

  price

0     31

1     28

2     22


>>> dt.select("price").where(dt.price > 20).sort('price').toDF()

  price

0     22

1     28

2     31


>>> dt.select(['sym','vol']).where(dt.price > 20).sort(['sym','vol desc']).toDF()

 sym  vol

0   A    4

1   B    3

2   B    2


>>> dt[dt.price > 20].toDF()

  price sym  vol

0     31   B    3

1     28   B    2

2     22   A    4





>>> dt[dt.price > 20].groupby('sym').agg('sum').showSQL()

'select sum(price),sum(vol) from Tb2e146a8 where (price > 20) group by sym'


>>> dt[dt.price > 20].groupby('sym').agg('sum').toDF()

 sym  sum_price  sum_vol

0   B         59        5

1   A         22        4


>>> dt.groupby('sym').agg({'vol':[ddb.sum2]}).showSQL()

'select sum2(vol) from Tb2e146a8 group by sym'


>>> dt.groupby('sym').agg({'vol':[ddb.sum2]}).toDF()

 sym  sum2_vol

0   A        42

1   B        13


>>> dt.groupby('sym').agg2([ddb.wsum, ddb.wavg], [("price", "vol")]).showSQL()

'select wavg(price,vol),wsum(price,vol) from Tb2e146a8 group by sym'


>>> dt.groupby('sym').agg2([ddb.wsum, ddb.wavg], [("price", "vol")]).toDF()

 sym  wavg_price  wsum_price

0   A        19.9         199

1   B        29.8         149





>>> dt.contextby('sym').agg({'price':[ddb.sum]}).showSQL()

'select sum(price),sym from Tb2e146a8 context by sym'


>>> dt.contextby('sym').agg({'price':[ddb.sum]}).toDF()

  sum_price sym

0         57   A

1         57   A

2         57   A

3         59   B

4         59   B


>>> dt.contextby('sym').agg({'price':[ddb.cumsum]}).showSQL()

'select cumsum(price),sym from Tb2e146a8 context by sym'


>>> dt.contextby('sym').agg({'price':[ddb.cumsum]}).toDF()

  cumsum_price sym

0            16   A

1            35   A

2            57   A

3            31   B

4            59   B





>>> dt.pivotby(index='vol', column='sym', value='price').showSQL()

'select price from Tb2e146a8 pivot by vol,sym'


>>> dt.pivotby(index='vol', column='sym', value='price').toDF()

  vol     A     B

0    1  16.0   NaN

1    2   NaN  28.0

2    3   NaN  31.0

3    4  22.0   NaN

4    5  19.0   NaN





>>> dt1 = conn.table(data={'sym': ['AAPL', 'AAPL', 'AAPL', 'AMZN', 'AMZN'], 'date': ['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06'], 'price': [122, 125, 123, 756, 767]})

>>> dt2 = conn.table(data={'sym': ['AAPL', 'AAPL', 'AMZN', 'AMZN', 'AMZN'], 'date': ['2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-10'], 'vol': [1100, 1400, 1600, 2200, 1800]})


>>> dt1.toDF();

        date  price   sym

0  2017-01-02    122  AAPL

1  2017-01-03    125  AAPL

2  2017-01-04    123  AAPL

3  2017-01-05    756  AMZN

4  2017-01-06    767  AMZN


>>> dt2.toDF();

        date   sym   vol

0  2017-01-04  AAPL  1100

1  2017-01-05  AAPL  1400

2  2017-01-06  AMZN  1600

3  2017-01-07  AMZN  2200

4  2017-01-10  AMZN  1800


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date']).showSQL()

'select Tc288e78d.date as rhs_date,Tc288e78d.sym as rhs_sym,Tc288e78d.vol as rhs_vol,Td8323cd0.date as lhs_date,Td8323cd0.price as lhs_price,Td8323cd0.sym as lhs_sym from ej(Td8323cd0,Tc288e78d,`sym`date,`sym`date)'


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date']).toDF()

    lhs_date  lhs_price lhs_sym    rhs_date rhs_sym  rhs_vol

0  2017-01-04        123    AAPL  2017-01-04    AAPL     1100

1  2017-01-06        767    AMZN  2017-01-06    AMZN     1600


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date'], how='left').showSQL()

'select Tc288e78d.date as rhs_date,Tc288e78d.sym as rhs_sym,Tc288e78d.vol as rhs_vol,Td8323cd0.date as lhs_date,Td8323cd0.price as lhs_price,Td8323cd0.sym as lhs_sym from lj(Td8323cd0,Tc288e78d,`sym`date,`sym`date)'


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date'], how='left').toDF()

    lhs_date  lhs_price lhs_sym    rhs_date rhs_sym  rhs_vol

0  2017-01-02        122    AAPL                          NaN

1  2017-01-03        125    AAPL                          NaN

2  2017-01-04        123    AAPL  2017-01-04    AAPL   1100.0

3  2017-01-05        756    AMZN                          NaN

4  2017-01-06        767    AMZN  2017-01-06    AMZN   1600.0





>>> dt1 = conn.table(data={'id': ['A','A','A','B','B'], 'date': pd.to_datetime(['2017-02-06', '2017-02-08', '2017-02-10', '2017-02-07', '2017-02-09']), 'price': [22, 23, 20, 100, 102]})

>>> dt2 = conn.table(data={'id': ['A','A','B','B','B'], 'date': pd.to_datetime(['2017-02-07', '2017-02-10', '2017-02-07', '2017-02-08', '2017-02-10'])})


>>> dt1.toDF()

                           date id  price

0  2017.02.06T00:00:00.000000000  A     22

1  2017.02.08T00:00:00.000000000  A     23

2  2017.02.10T00:00:00.000000000  A     20

3  2017.02.07T00:00:00.000000000  B    100

4  2017.02.09T00:00:00.000000000  B    102


>>> dt2.toDF()

                           date id

0  2017.02.07T00:00:00.000000000  A

1  2017.02.10T00:00:00.000000000  A

2  2017.02.07T00:00:00.000000000  B

3  2017.02.08T00:00:00.000000000  B

4  2017.02.10T00:00:00.000000000  B


>>> dt2.merge_asof(dt1, on=['id','date']).showSQL()

'select T38907df4.date as lhs_date,T38907df4.id as lhs_id,T97b7d897.date as rhs_date,T97b7d897.price as rhs_price,T97b7d897.id as rhs_id from aj(T38907df4,T97b7d897,`id`date,`id`date)'


>>> dt2.merge_asof(dt1, on=['id','date']).toDF()

                       lhs_date lhs_id                       rhs_date  rhs_price rhs_id

0  2017.02.07T00:00:00.000000000      A  2017.02.06T00:00:00.000000000         22      A

1  2017.02.10T00:00:00.000000000      A  2017.02.10T00:00:00.000000000         20      A

2  2017.02.07T00:00:00.000000000      B  2017.02.07T00:00:00.000000000        100      B

3  2017.02.08T00:00:00.000000000      B  2017.02.07T00:00:00.000000000        100      B

4  2017.02.10T00:00:00.000000000      B  2017.02.09T00:00:00.000000000        102      B

 



Download source code here.