当前位置:   article > 正文

python将csv数据导入neo4j_neo4j csv python

neo4j csv python

参考链接:https://github.com/jm199504/Financial-Knowledge-Graphs/tree/master

from pandas import DataFrame
from py2neo import Graph,Node,Relationship,NodeMatcher
import pandas as pd
import numpy as np
import os
# 连接Neo4j数据库
from py2neo import Graph, Node, Relationship, walk, NodeMatcher, RelationshipMatcher
import pandas as pd
import json
# 连接数据库 输入地址、用户名、密码
from py2neo import Graph

# 使用包含用户名和密码的 URI 连接到数据库
uri = "http://neo4j:neo4j@localhost:7474"
graph = Graph(uri)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
a = Node('Person',name='Tom')
graph.create(a)
b = Node('Person',name='Bob')
graph.create(b)

# 创建关系例子
r = Relationship(a,'KNOWS',b)
graph.create(r)

# 读取节点信息
node = DataFrame(graph.run('MATCH (n:`Person`) RETURN n LIMIT 25'))
# print(node)

# 读取关系信息
relation = DataFrame(graph.run('MATCH (n:`Person`)-[r]->(m:`Person`) return n,m,type(r)'))
# print(relation)

# 删除所有节点
graph.run('MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r')
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

(No data)

# 读取数据
stock = pd.read_csv('stock_basic.csv',encoding="gbk")
holder = pd.read_csv('stock_holders.csv',encoding="gbk")
concept_num = pd.read_csv('concept.csv',encoding="gbk")
concept = pd.read_csv('stock_concept.csv',encoding="gbk")
sh = pd.read_csv('sh.csv')
sz = pd.read_csv('sz.csv')
corr = pd.read_csv('corr.csv')
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
stock.head()
  • 1
Unnamed: 0TS代码股票代码股票名称行业
00000001.SZ1平安银行银行
11000002.SZ2万科A全国地产
22000004.SZ4国华网安互联网
33000005.SZ5世纪星源环境保护
44000006.SZ6深振业A区域地产
holder.head()
  • 1
Unnamed: 0ts_codeann_dateend_dateholder_namehold_amounthold_ratio
00000001.SZ2019030720181231新华人寿保险股份有限公司-分红-个人分红-018L-FH002深4.960350e+070.29
11000001.SZ2019030720181231中国平安保险(集团)股份有限公司-集团本级-自有资金8.510493e+0949.56
22000001.SZ2019030720181231中国平安人寿保险股份有限公司-自有资金1.049463e+096.11
33000001.SZ2019030720181231香港中央结算有限公司(陆股通)4.307515e+082.51
44000001.SZ2019030720181231中国证券金融股份有限公司4.292327e+082.50
concept_num.head()
  • 1
Unnamed: 0codenamesrc
00TS0密集调研ts
11TS1南北船合并ts
22TS25Gts
33TS3机场ts
44TS4高价股ts
concept.head()
  • 1
Unnamed: 0idconcept_namets_codename
00TS0密集调研000301.SZ东方盛虹
11TS0密集调研000401.SZ冀东水泥
22TS0密集调研000932.SZ华菱钢铁
33TS0密集调研002013.SZ中航机电
44TS0密集调研002106.SZ莱宝高科
sh.head()
  • 1
ts_codehs_typein_dateout_dateis_new
0601628.SHSH20141117NaN1
1601099.SHSH20141117NaN1
2601808.SHSH20141117NaN1
3601107.SHSH20141117NaN1
4601880.SHSH20141117NaN1
sz.head()
  • 1
ts_codehs_typein_dateout_dateis_new
0002910.SZSZ20171114NaN1
1000016.SZSZ20180102NaN1
2001872.SZSZ20180102NaN1
3000040.SZSZ20180102NaN1
4000401.SZSZ20180102NaN1
corr.head()
  • 1
Unnamed: 0s1s2corr
00000001.SZ.000001.SZ.1.000000
11000001.SZ.000002.SZ.0.648945
22000001.SZ.000005.SZ.0.342920
33000001.SZ.000009.SZ.0.297213
44000001.SZ.000010.SZ.0.186165
# 数据预处理
stock['行业'] = stock['行业'].fillna('未知')
holder = holder.drop_duplicates(subset=None, keep='first', inplace=False)
  • 1
  • 2
  • 3
# 创建实体(概念、股票、股东、股通)

sz = Node('深股通',名字='深股通')
graph.create(sz)  
 
sh = Node('沪股通',名字='沪股通')
graph.create(sh)  

for i in concept_num.values:
    a = Node('概念',概念代码=i[1],概念名称=i[2])
    # print('概念代码:'+str(i[1]),'概念名称:'+str(i[2]))
    graph.create(a)

for i in stock.values:
    a = Node('股票',TS代码=i[1],股票名称=i[3],行业=i[4])
    # print('TS代码:'+str(i[1]),'股票名称:'+str(i[3]),'行业:'+str(i[4]))
    graph.create(a)

for i in holder.values:
    a = Node('股东',TS代码=i[0],股东名称=i[1],持股数量=i[2],持股比例=i[3])
    # print('TS代码:'+str(i[0]),'股东名称:'+str(i[1]),'持股数量:'+str(i[2]))
    graph.create(a)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
# 创建关系(股票-股东、股票-概念、股票-公告、股票-股通)

matcher = NodeMatcher(graph) 
for i in holder.values:    
    a = matcher.match("股票",TS代码=i[0]).first()
    b = matcher.match("股东",TS代码=i[0])
    for j in b:
        r = Relationship(j,'参股',a)
        graph.create(r)
        print('TS',str(i[0]))
            
for i in concept.values:
    a = matcher.match("股票",TS代码=i[3]).first()
    b = matcher.match("概念",概念代码=i[1]).first()
    if a == None or b == None:
        continue
    r = Relationship(a,'概念属于',b)
    graph.create(r) 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18

  • 1
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/761034
推荐阅读
相关标签
  

闽ICP备14008679号