本篇文章简单介绍使用Python将文本数据导入MongoDB。 流程简示:
#mermaid-svg-mWZJX3WgC6QwPIWU .label{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);fill:#333;color:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .label text{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .node rect,#mermaid-svg-mWZJX3WgC6QwPIWU .node circle,#mermaid-svg-mWZJX3WgC6QwPIWU .node ellipse,#mermaid-svg-mWZJX3WgC6QwPIWU .node polygon,#mermaid-svg-mWZJX3WgC6QwPIWU .node path{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-mWZJX3WgC6QwPIWU .node .label{text-align:center;fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .node.clickable{cursor:pointer}#mermaid-svg-mWZJX3WgC6QwPIWU .arrowheadPath{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .edgePath .path{stroke:#333;stroke-width:1.5px}#mermaid-svg-mWZJX3WgC6QwPIWU .flowchart-link{stroke:#333;fill:none}#mermaid-svg-mWZJX3WgC6QwPIWU .edgeLabel{background-color:#e8e8e8;text-align:center}#mermaid-svg-mWZJX3WgC6QwPIWU .edgeLabel rect{opacity:0.9}#mermaid-svg-mWZJX3WgC6QwPIWU .edgeLabel span{color:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .cluster rect{fill:#ffffde;stroke:#aa3;stroke-width:1px}#mermaid-svg-mWZJX3WgC6QwPIWU .cluster text{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:12px;background:#ffffde;border:1px solid #aa3;border-radius:2px;pointer-events:none;z-index:100}#mermaid-svg-mWZJX3WgC6QwPIWU .actor{stroke:#ccf;fill:#ECECFF}#mermaid-svg-mWZJX3WgC6QwPIWU text.actor>tspan{fill:#000;stroke:none}#mermaid-svg-mWZJX3WgC6QwPIWU .actor-line{stroke:grey}#mermaid-svg-mWZJX3WgC6QwPIWU .messageLine0{stroke-width:1.5;stroke-dasharray:none;stroke:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .messageLine1{stroke-width:1.5;stroke-dasharray:2, 2;stroke:#333}#mermaid-svg-mWZJX3WgC6QwPIWU #arrowhead path{fill:#333;stroke:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .sequenceNumber{fill:#fff}#mermaid-svg-mWZJX3WgC6QwPIWU #sequencenumber{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU #crosshead path{fill:#333;stroke:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .messageText{fill:#333;stroke:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .labelBox{stroke:#ccf;fill:#ECECFF}#mermaid-svg-mWZJX3WgC6QwPIWU .labelText,#mermaid-svg-mWZJX3WgC6QwPIWU .labelText>tspan{fill:#000;stroke:none}#mermaid-svg-mWZJX3WgC6QwPIWU .loopText,#mermaid-svg-mWZJX3WgC6QwPIWU .loopText>tspan{fill:#000;stroke:none}#mermaid-svg-mWZJX3WgC6QwPIWU .loopLine{stroke-width:2px;stroke-dasharray:2, 2;stroke:#ccf;fill:#ccf}#mermaid-svg-mWZJX3WgC6QwPIWU .note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-mWZJX3WgC6QwPIWU .noteText,#mermaid-svg-mWZJX3WgC6QwPIWU .noteText>tspan{fill:#000;stroke:none}#mermaid-svg-mWZJX3WgC6QwPIWU .activation0{fill:#f4f4f4;stroke:#666}#mermaid-svg-mWZJX3WgC6QwPIWU .activation1{fill:#f4f4f4;stroke:#666}#mermaid-svg-mWZJX3WgC6QwPIWU .activation2{fill:#f4f4f4;stroke:#666}#mermaid-svg-mWZJX3WgC6QwPIWU .mermaid-main-font{font-family:"trebuchet ms", verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .section{stroke:none;opacity:0.2}#mermaid-svg-mWZJX3WgC6QwPIWU .section0{fill:rgba(102,102,255,0.49)}#mermaid-svg-mWZJX3WgC6QwPIWU .section2{fill:#fff400}#mermaid-svg-mWZJX3WgC6QwPIWU .section1,#mermaid-svg-mWZJX3WgC6QwPIWU .section3{fill:#fff;opacity:0.2}#mermaid-svg-mWZJX3WgC6QwPIWU .sectionTitle0{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .sectionTitle1{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .sectionTitle2{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .sectionTitle3{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .sectionTitle{text-anchor:start;font-size:11px;text-height:14px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .grid .tick{stroke:#d3d3d3;opacity:0.8;shape-rendering:crispEdges}#mermaid-svg-mWZJX3WgC6QwPIWU .grid .tick text{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .grid path{stroke-width:0}#mermaid-svg-mWZJX3WgC6QwPIWU .today{fill:none;stroke:red;stroke-width:2px}#mermaid-svg-mWZJX3WgC6QwPIWU .task{stroke-width:2}#mermaid-svg-mWZJX3WgC6QwPIWU .taskText{text-anchor:middle;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .taskText:not([font-size]){font-size:11px}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutsideRight{fill:#000;text-anchor:start;font-size:11px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutsideLeft{fill:#000;text-anchor:end;font-size:11px}#mermaid-svg-mWZJX3WgC6QwPIWU .task.clickable{cursor:pointer}#mermaid-svg-mWZJX3WgC6QwPIWU .taskText.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutsideLeft.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutsideRight.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-mWZJX3WgC6QwPIWU .taskText0,#mermaid-svg-mWZJX3WgC6QwPIWU .taskText1,#mermaid-svg-mWZJX3WgC6QwPIWU .taskText2,#mermaid-svg-mWZJX3WgC6QwPIWU .taskText3{fill:#fff}#mermaid-svg-mWZJX3WgC6QwPIWU .task0,#mermaid-svg-mWZJX3WgC6QwPIWU .task1,#mermaid-svg-mWZJX3WgC6QwPIWU .task2,#mermaid-svg-mWZJX3WgC6QwPIWU .task3{fill:#8a90dd;stroke:#534fbc}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutside0,#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutside2{fill:#000}#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutside1,#mermaid-svg-mWZJX3WgC6QwPIWU .taskTextOutside3{fill:#000}#mermaid-svg-mWZJX3WgC6QwPIWU .active0,#mermaid-svg-mWZJX3WgC6QwPIWU .active1,#mermaid-svg-mWZJX3WgC6QwPIWU .active2,#mermaid-svg-mWZJX3WgC6QwPIWU .active3{fill:#bfc7ff;stroke:#534fbc}#mermaid-svg-mWZJX3WgC6QwPIWU .activeText0,#mermaid-svg-mWZJX3WgC6QwPIWU .activeText1,#mermaid-svg-mWZJX3WgC6QwPIWU .activeText2,#mermaid-svg-mWZJX3WgC6QwPIWU .activeText3{fill:#000 !important}#mermaid-svg-mWZJX3WgC6QwPIWU .done0,#mermaid-svg-mWZJX3WgC6QwPIWU .done1,#mermaid-svg-mWZJX3WgC6QwPIWU .done2,#mermaid-svg-mWZJX3WgC6QwPIWU .done3{stroke:grey;fill:#d3d3d3;stroke-width:2}#mermaid-svg-mWZJX3WgC6QwPIWU .doneText0,#mermaid-svg-mWZJX3WgC6QwPIWU .doneText1,#mermaid-svg-mWZJX3WgC6QwPIWU .doneText2,#mermaid-svg-mWZJX3WgC6QwPIWU .doneText3{fill:#000 !important}#mermaid-svg-mWZJX3WgC6QwPIWU .crit0,#mermaid-svg-mWZJX3WgC6QwPIWU .crit1,#mermaid-svg-mWZJX3WgC6QwPIWU .crit2,#mermaid-svg-mWZJX3WgC6QwPIWU .crit3{stroke:#f88;fill:red;stroke-width:2}#mermaid-svg-mWZJX3WgC6QwPIWU .activeCrit0,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCrit1,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCrit2,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCrit3{stroke:#f88;fill:#bfc7ff;stroke-width:2}#mermaid-svg-mWZJX3WgC6QwPIWU .doneCrit0,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCrit1,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCrit2,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCrit3{stroke:#f88;fill:#d3d3d3;stroke-width:2;cursor:pointer;shape-rendering:crispEdges}#mermaid-svg-mWZJX3WgC6QwPIWU .milestone{transform:rotate(45deg) scale(0.8, 0.8)}#mermaid-svg-mWZJX3WgC6QwPIWU .milestoneText{font-style:italic}#mermaid-svg-mWZJX3WgC6QwPIWU .doneCritText0,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCritText1,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCritText2,#mermaid-svg-mWZJX3WgC6QwPIWU .doneCritText3{fill:#000 !important}#mermaid-svg-mWZJX3WgC6QwPIWU .activeCritText0,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCritText1,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCritText2,#mermaid-svg-mWZJX3WgC6QwPIWU .activeCritText3{fill:#000 !important}#mermaid-svg-mWZJX3WgC6QwPIWU .titleText{text-anchor:middle;font-size:18px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU g.classGroup text{fill:#9370db;stroke:none;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:10px}#mermaid-svg-mWZJX3WgC6QwPIWU g.classGroup text .title{font-weight:bolder}#mermaid-svg-mWZJX3WgC6QwPIWU g.clickable{cursor:pointer}#mermaid-svg-mWZJX3WgC6QwPIWU g.classGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-mWZJX3WgC6QwPIWU g.classGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU .classLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.5}#mermaid-svg-mWZJX3WgC6QwPIWU .classLabel .label{fill:#9370db;font-size:10px}#mermaid-svg-mWZJX3WgC6QwPIWU .relation{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-mWZJX3WgC6QwPIWU .dashed-line{stroke-dasharray:3}#mermaid-svg-mWZJX3WgC6QwPIWU #compositionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #compositionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #aggregationStart{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #aggregationEnd{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #dependencyStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #dependencyEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #extensionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU #extensionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU .commit-id,#mermaid-svg-mWZJX3WgC6QwPIWU .commit-msg,#mermaid-svg-mWZJX3WgC6QwPIWU .branch-label{fill:lightgrey;color:lightgrey;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .pieTitleText{text-anchor:middle;font-size:25px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .slice{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU g.stateGroup text{fill:#9370db;stroke:none;font-size:10px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU g.stateGroup text{fill:#9370db;fill:#333;stroke:none;font-size:10px}#mermaid-svg-mWZJX3WgC6QwPIWU g.statediagram-cluster .cluster-label text{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU g.stateGroup .state-title{font-weight:bolder;fill:#000}#mermaid-svg-mWZJX3WgC6QwPIWU g.stateGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-mWZJX3WgC6QwPIWU g.stateGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-mWZJX3WgC6QwPIWU .transition{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-mWZJX3WgC6QwPIWU .stateGroup .composit{fill:white;border-bottom:1px}#mermaid-svg-mWZJX3WgC6QwPIWU .stateGroup .alt-composit{fill:#e0e0e0;border-bottom:1px}#mermaid-svg-mWZJX3WgC6QwPIWU .state-note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-mWZJX3WgC6QwPIWU .state-note text{fill:black;stroke:none;font-size:10px}#mermaid-svg-mWZJX3WgC6QwPIWU .stateLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.7}#mermaid-svg-mWZJX3WgC6QwPIWU .edgeLabel text{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .stateLabel text{fill:#000;font-size:10px;font-weight:bold;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-mWZJX3WgC6QwPIWU .node circle.state-start{fill:black;stroke:black}#mermaid-svg-mWZJX3WgC6QwPIWU .node circle.state-end{fill:black;stroke:white;stroke-width:1.5}#mermaid-svg-mWZJX3WgC6QwPIWU #statediagram-barbEnd{fill:#9370db}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-cluster rect{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-cluster rect.outer{rx:5px;ry:5px}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-state .divider{stroke:#9370db}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-state .title-state{rx:5px;ry:5px}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-cluster.statediagram-cluster .inner{fill:white}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-cluster.statediagram-cluster-alt .inner{fill:#e0e0e0}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-cluster .inner{rx:0;ry:0}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-state rect.basic{rx:5px;ry:5px}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-state rect.divider{stroke-dasharray:10,10;fill:#efefef}#mermaid-svg-mWZJX3WgC6QwPIWU .note-edge{stroke-dasharray:5}#mermaid-svg-mWZJX3WgC6QwPIWU .statediagram-note rect{fill:#fff5ad;stroke:#aa3;stroke-width:1px;rx:0;ry:0}:root{--mermaid-font-family: '"trebuchet ms", verdana, arial';--mermaid-font-family: "Comic Sans MS", "Comic Sans", cursive}#mermaid-svg-mWZJX3WgC6QwPIWU .error-icon{fill:#522}#mermaid-svg-mWZJX3WgC6QwPIWU .error-text{fill:#522;stroke:#522}#mermaid-svg-mWZJX3WgC6QwPIWU .edge-thickness-normal{stroke-width:2px}#mermaid-svg-mWZJX3WgC6QwPIWU .edge-thickness-thick{stroke-width:3.5px}#mermaid-svg-mWZJX3WgC6QwPIWU .edge-pattern-solid{stroke-dasharray:0}#mermaid-svg-mWZJX3WgC6QwPIWU .edge-pattern-dashed{stroke-dasharray:3}#mermaid-svg-mWZJX3WgC6QwPIWU .edge-pattern-dotted{stroke-dasharray:2}#mermaid-svg-mWZJX3WgC6QwPIWU .marker{fill:#333}#mermaid-svg-mWZJX3WgC6QwPIWU .marker.cross{stroke:#333} :root { --mermaid-font-family: "trebuchet ms", verdana, arial;} #mermaid-svg-mWZJX3WgC6QwPIWU { color: rgba(0, 0, 0, 0.75); font: ; } 环境检查 未配置 配置相关环境 连接MongoDB 已配置 数据准备 pandas处理数据 转为字典列表 导入数据库Python3.6.8
pip install pymongo安装pymongo包
pip install pandas安装pandas包
pip install re安装re包 正则表达式处理字符串,自定义命名集合名,可忽略该安装包。
filePath = r'D:\data\\' + fileName :拼接字符串,即拼接处完成路径地址; re.sub('.txt', '', fileName) 正则表达式去除后缀 .txt,可直接给集合命名(collection=collectionName),省略此操作; columns = [] 列名列表,即对应的字段名。
pd.set_option('display.max_columns', None) pandas展示数据时,当列数过多时,会隐藏中间部分列,此操作可全部展示。
pd.read_table() 参数介绍: filepath :文件路径; header=None : 默认会自动推断数据文件列名,如果设置为None则无文件列名,为1则第一行是文件列名; names=columns :设置列名; sep='\!\^' :设置拆分符; usercols=[i for i in range(start,end)] :设置需要从文件读出的列,默认全部读出; engine="python" :默认是c引擎解析,如果使用python引擎,可以解析更丰富的内容,当数据较多时,c引擎解析速度比python快很多; encoding="utf-8":设置解码格式,防止中文乱码。
注解: usercols=[i for i in range(start,end)]: 列表生成式,假设start=1,end=5,结果等同于[1,2,3,4] (迭代)。 也可替换为不连续列表,如:usercols=[2,5,6]
i :行号,此处未用。 row :可理解为键值列表,row[key] 表示取值。 {key:row[key] for key in columns} :字典生成式,同上述列表生成式。
db[collection]. 后跟任意MongoDB操作。