Skip to content

Schema for Data Generation

The schema to configure the input file for data generation.

Schema:

### Schema for YAML configuration to run CLI script

stages:                 ### ANCHOR: Required stages to run
  type: list
  required: True
  allowed:
  - build_structure     # build the atomic structures
  - optimize_structure  # optimize the structure
  - normalize_cell      # normalize the cell to upper/lower triangular matrix
  - sampling_space      # explore the sampling space
  - run_dft             # run the DFTsinglepoint/AIMD simulation
  - collect_data        # collect the data


structure:              ### ANCHOR: Define atomic structure
  type: dict
  required: True
  schema:
    from_extxyz:        # list-of-paths to the EXTXYZ files to be used as the initial structure. If provided, the structure will be read from the file, and the other structure parameters will be ignored.
      type: list

    from_scratch:
      type: dict
      schema:
        structure_type: # bulk, molecule, surface,
          type: 'string'
        chem_formula:   # chemical formula/element. e.g., "H2O", "Mg2O2", "Mg",
          type: 'string'
        supercell:      #  size of the supercell
          type: list
        pbc:
          type: list
        add_vacuum:     # add vacuum space into each dimension x, y, z. Note: vacuum is total on both sides.
          type: list
        ase_arg:        # ASE kwargs for building the structure. https://wiki.fysik.dtu.dk/ase/ase/build/build.html#
          type: dict
          required: True
          allow_unknown: True

          # crystalstructure: "bcc" # choices: sc,fcc,bcc,tetragonal,bct,hcp,rhombohedral,orthorhombic,mcl,diamond,zincblende,rocksalt,cesiumchloride,fluorite,wurtzite.
          # a: 3.98  # lattice constant
          # # cubic: True

    make_triangular_form:     # normalize the cell to upper/lower triangular matrix, to able to use NPT ensemble
      type: string
      allowed: ['upper', 'lower']


sampling_space:         # sampling `scaled structures` in ranges of temperatures; Or sampling `original structures` in ranges of temperatues and stressess.
  type: dict
  schema:
    scale:
      type: dict
      schema:
        scale_x:        # scale the structure in x-direction. E.g., [0.5, 1.5]
          type: list
        scale_y:        # scale the structure in y-direction. E.g., [0.5, 1.5]
          type: list
        scale_z:        # scale the structure in z-direction. E.g., [0.5, 1.5]
          type: list
        temps:          # list of temperatures in K. E.g., [300, 600]
          type: list

    temp_press:        # sampling in temperature and stress
      type: dict
      schema:
        temps:          # list of temperatures in K
          type: list
        pressures:       # list of stress in GPa
          type: list


dft:                    ### ANCHOR: DFT calculators
  type: dict
  schema:
    calc:               # accept all keywords for ['ase.calc'](https://thangckt.github.io/alff_doc/schema/config_ase/)
      type: dict
      required: True
      schema:
        gpaw:           # GPAW calculator parameters
          type: dict
        dftd3:          # DFT-D3 calculator for Van der Waals correction
          type: dict

    optimize:           # run DFT to optimize the structure
      type: dict

    md:                 # run AIMD simulation
      type: dict