Components of the DataONE Infrastructure

The following diagrams indicate code dependencies between various components that make up the DataONE infrastructure. Functional dependencies are not depicted.

Common Components

Figure 1. Core shared components.

digraph core_components {

   fontname = "Bitstream Vera Sans";
   fontsize = 8;

   edge [
     fontname = "Bitstream Vera Sans"
     fontsize = 8
     color = "#888888"
     arrowhead = "open"
     arrowsize = 0.5
     style="solid"
     ];

   node [
     fontname = "Courier"
     fontsize = 8
     fontcolor = "black"];

   color="#888888";

   schema [label="dataonetypes.xsd" URL="#dataonetypes"];
   d1_common_java;
   d1_common_python;
   d1_libclient_java;
   d1_libclient_python;
   d1_architecture;
   d1_jibx_extensions;
   d1_test_resources;
   foresite_java [style="filled" bgcolor="grey80"];
   foresite_python [style="filled" bgcolor="grey80"];

   d1_architecture -> schema;
   d1_common_java -> schema;
   d1_common_java -> d1_jibx_extensions;
   d1_common_java -> d1_test_resources [style="dashed"];
   d1_common_java -> d1_architecture [style="dotted"];
   d1_common_python -> schema;
   d1_common_python  -> d1_architecture [style="dotted"];
   d1_libclient_java -> d1_common_java;
   d1_libclient_java -> d1_test_resources [style="dashed"];
   d1_libclient_java  -> d1_architecture [style="dotted"];
   d1_libclient_java  -> foresite_java;
   d1_libclient_python -> d1_common_python;
   d1_libclient_python -> d1_architecture [style="dotted"];
   d1_libclient_python -> foresite_python;

}

NEEDS VERIFYING

Integration Test Tools

Figure 2. Test services.

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      d1_jibx_extensions;
      d1_test_resources;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    d1_integration;
    d1_web_test_site;
    d1_instance_generator;
    d1_echo_service;

    d1_integration -> d1_common_java;
    d1_integration -> d1_libclient_java;
    d1_integration -> Certificates;

    d1_web_test_site -> d1_integration;

    d1_instance_generator -> d1_common_python;
    d1_instance_generator -> d1_libclient_python;
}

Coordinating Node Components

Figure 3. Coordinating node components.

digraph core_components {

   color="#888888";
   fontname = "Bitstream Vera Sans";
   fontsize = 10;

   edge [
     fontname = "Bitstream Vera Sans"
     fontsize = 8
     color = "#888888"
     arrowhead = "open"
     arrowsize = 0.5
     style="solid"
     ];

   node [
     fontname = "Courier"
     fontsize = 10
     fontcolor = "black"];

   subgraph cluster_CORE {
     label="Shared Components";
     color="#888888";

     schema [label="dataonetypes.xsd"];
     d1_common_java;
     d1_common_python;
     d1_libclient_java;
     d1_libclient_python;
     d1_architecture;
     d1_jibx_extensions;
     d1_test_resources;
     foresite_java [style="filled" bgcolor="grey80"];
     foresite_python [style="filled" bgcolor="grey80"];

     d1_architecture -> schema;
     d1_common_java -> schema;
     d1_common_java -> d1_jibx_extensions;
     d1_common_java -> d1_test_resources [style="dashed"];
     d1_common_java -> d1_architecture [style="dotted"];
     d1_common_python -> schema;
     d1_common_python  -> d1_architecture [style="dotted"];
     d1_libclient_java -> d1_common_java;
     d1_libclient_java -> d1_test_resources [style="dashed"];
     d1_libclient_java  -> d1_architecture [style="dotted"];
     d1_libclient_java  -> foresite_java;
     d1_libclient_python -> d1_common_python;
     d1_libclient_python -> d1_architecture [style="dotted"];
     d1_libclient_python -> foresite_python;
   }


   postgres [style="filled" fillcolor="grey80"];
   hazelcast [style="filled" bgcolor="grey80"];
   LDAP [style="filled" bgcolor="grey80"];
   SOLR [style="filled" bgcolor="grey80"];
   cilogon_portal_servlet [style="filled" bgcolor="grey80" label="cilogon-portal-servlet"];

   d1_cn_version_tool;
   d1_cn_common;
   d1_cn_index_common;
   d1_cn_index_generator;
   d1_cn_noderegistry;
   d1_mercury_common;
   d1_portal;
   d1_cn_approve_node;
   d1_identity_manager;
   d1_log_aggregation;
   d1_mercury_ui;
   d1_portal_servlet;
   d1_cn_rest;
   d1_cn_rest_proxy;
   d1_cn_service;
   d1_solr_extensions;
   d1_synchronization;
   d1_cn_index_processor;
   d1_replication;
   d1_cn_index_tool;
   d1_process_daemon;
   cn_metacat;

   d1_cn_common -> d1_common_java;
   d1_cn_common -> hazelcast;

   d1_cn_index_common -> d1_common_java;
   d1_cn_index_common -> postgres;

   d1_cn_index_generator -> d1_cn_index_common;
   d1_cn_index_generator -> d1_common_java;
   d1_cn_index_generator -> d1_cn_common;
   d1_cn_index_generator -> postgres;
   d1_cn_index_generator -> hazelcast;

   d1_cn_noderegistry -> d1_cn_common;
   d1_cn_noderegistry -> LDAP;
   d1_cn_noderegistry -> hazelcast;

   d1_mercury_common -> d1_common_java;
   d1_mercury_common -> d1_libclient_java;
   d1_mercury_common -> d1_portal;

   d1_portal -> d1_common_java;
   d1_portal -> d1_libclient_java;

   d1_cn_approve_node -> d1_cn_noderegistry;
   d1_cn_approve_node -> d1_libclient_java;

   d1_identity_manager -> d1_libclient_java;
   d1_identity_manager -> d1_cn_noderegistry;

   d1_log_aggregation -> d1_libclient_java;
   d1_log_aggregation -> d1_cn_noderegistry;

   d1_mercury_ui -> d1_common_java;
   d1_mercury_ui -> d1_libclient_java;
   d1_mercury_ui -> d1_portal;

   d1_portal_servlet -> cilogon_portal_servlet;
   d1_portal_servlet -> d1_portal;
   d1_portal_servlet -> hazelcast;

   d1_cn_rest_proxy -> d1_cn_common;

   d1_cn_rest -> d1_libclient_java;
   d1_cn_rest -> d1_identity_manager;
   d1_cn_rest -> d1_portal;
   d1_cn_rest -> d1_cn_rest_proxy;
   d1_cn_rest -> d1_cn_noderegistry;
   d1_cn_rest -> d1_cn_common;

   d1_cn_service -> d1_cn_rest;
   d1_cn_service -> d1_cn_rest_proxy;

   d1_solr_extensions -> d1_libclient_java;
   d1_solr_extensions -> d1_cn_common;
   d1_solr_extensions -> d1_identity_manager;
   d1_solr_extensions -> d1_cn_noderegistry;
   d1_solr_extensions -> d1_portal;

   d1_synchronization -> d1_libclient_java;
   d1_synchronization -> d1_cn_noderegistry;
   d1_synchronization -> d1_identity_manager;
   d1_synchronization -> hazelcast;

   d1_cn_index_processor -> d1_cn_common;
   d1_cn_index_processor -> d1_libclient_java;
   d1_cn_index_processor -> d1_cn_index_common;
   d1_cn_index_processor -> d1_cn_index_generator;
   d1_cn_index_processor -> postgres;
   d1_cn_index_processor -> SOLR;

   d1_replication -> d1_cn_noderegistry;
   d1_replication -> d1_cn_common;
   d1_replication -> d1_libclient_java;
   d1_replication -> hazelcast;

   d1_cn_index_tool -> d1_common_java;
   d1_cn_index_tool -> d1_libclient_java;
   d1_cn_index_tool -> d1_cn_common;
   d1_cn_index_tool -> d1_cn_index_common;
   d1_cn_index_tool -> d1_cn_index_generator;
   d1_cn_index_tool -> d1_cn_index_processor;

   d1_process_daemon -> d1_synchronization;
   d1_process_daemon -> d1_log_aggregation;
   d1_process_daemon -> d1_replication;
   d1_process_daemon -> hazelcast;

   cn_metacat -> d1_common_java;
   cn_metacat -> d1_libclient_java;
   cn_metacat -> postgres;
   cn_metacat -> hazelcast;
   cn_metacat -> foresite_java;

}

Member Node Components and Instances

Figure 4. Member node implementations (ovals) and instances (rectangles).

NEEDS UPDATING

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    Metacat;
    Mercury_MN;

    KNB [shape="record"];
    ORNL_DAAC [shape="record"];
    USGS [shape="record"];
    Dryad [shape="record"];
    Merritt [shape="record"];
    SAN_Parks [shape="record"];
    MN_Replication_UNM_1 [shape="record"];
    MN_Replication_UNM_2 [shape="record"];
    MN_Replication_UCSB_1 [shape="record"];
    MN_Replication_UCSB_2 [shape="record"];
    MN_Replication_ORC_1 [shape="record"];
    MN_Replication_ORC_2 [shape="record"];

    Metacat -> d1_common_java;
    Metacat -> d1_libclient_java;
    KNB -> Metacat;
    SAN_Parks -> Metacat;
    Merritt -> Metacat;

    Mercury_MN -> d1_common_java;
    ORNL_DAAC -> Mercury_MN;
    USGS -> Mercury_MN;

    GMN -> d1_common_python;
    GMN -> d1_libclient_python;
    MN_Replication_UNM_1 -> GMN;
    MN_Replication_UCSB_1 -> GMN;
    MN_Replication_ORC_1 -> GMN;

    MN_Replication_UNM_2 -> Metacat;
    MN_Replication_UCSB_2 -> Metacat;
    MN_Replication_ORC_2 -> Metacat;

    Dryad -> d1_common_java;
    Dryad -> d1_libclient_java;
}

Investigator Toolkit Components

Figure 5. Investigator toolkit.

NEEDS UPDATING

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    d1_client_cli;
    d1_client_r;
    d1_client_fuse;
    d1_client_dokan;

    d1_client_cli -> d1_common_python;
    d1_client_cli -> d1_libclient_python;
    d1_client_r -> d1_common_java;
    d1_client_r -> d1_libclient_java;
    d1_client_fuse -> d1_common_python;
    d1_client_fuse -> d1_libclient_python;
    d1_client_dokan -> d1_client_fuse;
}

Table of Components

Components
Component Category Responsible Description
‘Component’ ‘Category’ ‘Responsible’ ‘Description’
‘dataonetypes’ ‘Common’ ‘MJ’ ‘Schema used for defining serialization of core data types’
‘d1_architecture’ ‘Common’ ‘DV’ ‘The system architecture documentation’
‘operations’ ‘Operations’ ‘DV’ ‘Operations documentation - servers etc’
‘d1_common_java’ ‘Common’ ‘RW’ ‘Base DataONE library in Java’
‘d1_common_python’ ‘Common’ ‘RD’ ‘Base DataONE library in Python’
‘d1_libclient_java’ ‘Common’ ‘RN’ ‘Client library implemented in Java’
‘d1_libclient_python’ ‘Common’ ‘RD’ ‘Client library implemented in Python’
‘d1_web_test_site’ ‘Testing’ ‘RN’ ‘Member node integration testing service’
‘d1_echo_service’ ‘Testing’ ‘DV’ ‘A HTTP echo service used for testing’
‘d1_integration’ ‘Testing’ ‘RN’ ‘Integration testing for components and combinations thereof’
‘Certificates’ ‘Testing’ ‘RW’ ‘Generation and management of certificates for use by server components’
‘d1_instance_generator’ ‘Testing’ ‘RD’ ‘Generates example instances of objects defined in dataoneTypes.xsd’
‘one_mercury’ ‘CN’ ‘GP’ ‘The search interface that is implemented by the Mercury search index’
‘cn_metacat’ ‘CN’ ‘CJ’ ‘The Metacat application. Currently employed as the replicated object store on Coordinating Nodes.’
‘d1_cn_index_processor’ ‘CN’ ‘SR/DV’ ‘Populates the SOLR index by extracting informaton from system metadata, science metadata and resource maps.’
‘d1_cn_index_generator’ ‘CN’ ‘SR/DV’ ‘Generates indexing tasks when new objects appear or system metadata changes’
‘d1_cn_index_common’ ‘CN’ ‘SR/DV’ ‘Code shared between the indexing components’
‘indexerapi’ ‘CN’ ‘SR/DV’ ‘A library used by the index_processor for extracting content from various types of XML structures such as system metadata, science metadata and resource maps. ‘
‘d1_portal_servlet’ ‘CN’ ‘BL’ ‘Provides a UI for interacting with the CILogon service, an authentication proxy service’
‘d1_portal’ ‘CN’ ‘BL’ ‘Implements the certificate manager used by the portal servlets’
‘d1_identity_manager’ ‘CN’ ‘BL’ ‘Provides mechanisms for managing subjects in dataone’
‘d1_process_daemon’ ‘CN’ ‘RW’ ‘Monitors content on member nodes, creating tasks for synchronization and replication’
‘d1_synchronization’ ‘CN’ ‘RW’ ‘Manages the synchronization of content between Member Nodes and the Coordinating Nodes.’
‘d1_replication’ ‘CN’ ‘CJ’ ‘Manages replication of content between Member Nodes’
‘d1_cn_noderegistry’ ‘CN’ ‘RW’ ‘A register of coordinating and member nodes participating in a DataONE environment’
‘d1_cn_common’ ‘CN’ ‘RW’ ‘A library of code shared between coordintating node components’
‘d1_cn_rest’ ‘CN’ ‘RW’ ‘The coordinating node HTTP REST service interface’
‘d1_cn_rest_proxy’ ‘CN’ ‘RW’ ‘Proxies requests coming in to a CN to underlying service implementations such as the object store (i.e. Metacat)’
‘d1_cn_service’ ‘CN’ ‘RW’ ‘Coordinating node service, implementing the service APIs, data storage, and CN replication.’
‘d1_simple_search’ ‘CN’ ‘DV’ ‘A simple search interface using Javascript and the SOLR interface.’
‘Metacat’ ‘MN’ ‘CJ’ ‘The Metacat application. Implements the DataONE MN service interfaces.’
‘Dryad’ ‘MN’ ‘RS’ ‘A member node implementation and instance for the Dryad repository’
‘GMN’ ‘MN’ ‘RD’ ‘A generic, standalone Member Node implementation written in Python using the Django framework.’
‘Mercury_MN’ ‘MN’ ‘JG’ ‘Mercury implementation of the Member Node services’
‘d1_client_cli’ ‘ITK’ ‘RD’ ‘A command line client for interacting with the DataONE infrastructure. Currently implemented using d1_libclient_python.’
‘d1_client_fuse’ ‘ITK’ ‘DV’ ‘A FUSE driver for mounting the DataONE infrastructure as a file system.’
‘d1_client_dokan’ ‘ITK’ ‘DV’ ‘An extention of the FUSE driver that is based on Dokan for use on Microsoft Windows systems.’
‘d1_client_r’ ‘ITK’ ‘MJ’ ‘A plugin for R that enables access to DataONE content from the R application. Implemented using d1_libclient_java.’
‘hzpeek’ ‘Testing’ ‘DV’ ‘A tool for examining the Hazelcast queues on the CNs’